1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_fineHash_phase2: C=A*B (or with M packed), fine Hash, phase2
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11 
12     //--------------------------------------------------------------------------
13     // phase2: fine hash task, C(:,j)=A*B(:,j)
14     //--------------------------------------------------------------------------
15 
16     // Given Hf [hash] split into (h,f)
17 
18     // h == 0  , f == 0 : unlocked and unoccupied.
19     // h == i+1, f == 2 : unlocked, occupied by C(i,j).  Hx is initialized.
20     // h == ..., f == 3 : locked.
21 
22     // 0 -> 3 : to lock, if i seen for first time
23     // 2 -> 3 : to lock, if i seen already
24     // 3 -> 2 : to unlock; now i has been seen
25 
26     // The mask M can be optionally checked, if it is dense and checked in
27     // place.  This method is not used if M is present and sparse.
28 
29     if (team_size == 1)
30     {
31 
32         //----------------------------------------------------------------------
33         // single-threaded version
34         //----------------------------------------------------------------------
35 
36         // the hash state 3 is not used, since only a single thread is
37         // doing all the work for this vector C(:,j).
38 
39         // 0: if i seen for first time
40         // 2: if i seen already
41 
42         for ( ; pB < pB_end ; pB++)     // scan B(:,j)
43         {
44             GB_GET_B_kj_INDEX ;         // get index k of B(k,j)
45             GB_GET_A_k ;                // get A(:,k)
46             if (aknz == 0) continue ;
47             GB_GET_B_kj ;               // bkj = B(k,j)
48             // scan A(:,k)
49             for (int64_t pA = pA_start ; pA < pA_end ; pA++)
50             {
51                 GB_GET_A_ik_INDEX ;     // get index i of A(i,j)
52                 #ifdef GB_CHECK_MASK_ij
53                 // check mask condition and skip if C(i,j)
54                 // is protected by the mask
55                 GB_CHECK_MASK_ij ;
56                 #endif
57                 GB_MULT_A_ik_B_kj ;         // t = A(i,k) * B(k,j)
58                 int64_t i_unlocked = ((i+1) << 2) + 2 ;    // (i+1,2)
59                 // find the entry i in the hash table
60                 bool hf_unlocked = false ;  // true if i found
61                 bool hf_empty = false ;     // true if empty slot found
62                 int64_t hash ;
63                 for (hash = GB_HASHF (i) ; ; GB_REHASH (hash,i))
64                 {
65                     int64_t hf = Hf [hash] ;    // grab the entry
66                     hf_unlocked = (hf == i_unlocked) ;
67                     hf_empty = (hf == 0) ;
68                     if (hf_unlocked || hf_empty) break ;
69                 }
70                 if (hf_unlocked)    // if true, update C(i,j)
71                 {
72                     // hash entry occuppied by C(i,j): update it
73                     GB_HX_UPDATE (hash, t) ;    // Hx [hash] += t
74                 }
75                 else // hf_empty:   if true, load the hash entry with C(i,j)
76                 {
77                     // hash entry unoccuppied: fill it with C(i,j)
78                     ASSERT (hf_empty) ;
79                     GB_HX_WRITE (hash, t) ;     // Hx [hash] = t
80                     Hf [hash] = i_unlocked ;    // unlock entry
81                 }
82             }
83         }
84 
85     }
86     else
87     {
88 
89         //----------------------------------------------------------------------
90         // multi-threaded version
91         //----------------------------------------------------------------------
92 
93         for ( ; pB < pB_end ; pB++)     // scan B(:,j)
94         {
95             GB_GET_B_kj_INDEX ;         // get index k of B(k,j)
96             GB_GET_A_k ;                // get A(:,k)
97             if (aknz == 0) continue ;
98             GB_GET_B_kj ;               // bkj = B(k,j)
99             // scan A(:,k)
100             for (int64_t pA = pA_start ; pA < pA_end ; pA++)
101             {
102                 GB_GET_A_ik_INDEX ;     // get index i of A(i,j)
103                 #ifdef GB_CHECK_MASK_ij
104                 // check mask condition and skip if C(i,j)
105                 // is protected by the mask
106                 GB_CHECK_MASK_ij ;
107                 #endif
108                 GB_MULT_A_ik_B_kj ;         // t = A(i,k) * B(k,j)
109                 int64_t i1 = i + 1 ;        // i1 = one-based index
110                 int64_t i_unlocked = (i1 << 2) + 2 ;    // (i+1,2)
111                 for (GB_HASH (i))           // find i in hash table
112                 {
113                     int64_t hf ;
114                     GB_ATOMIC_READ
115                     hf = Hf [hash] ;        // grab the entry
116                     #if GB_HAS_ATOMIC
117                     if (hf == i_unlocked)  // if true, update C(i,j)
118                     {
119                         GB_ATOMIC_UPDATE_HX (hash, t) ;// Hx [.]+=t
120                         break ;         // C(i,j) has been updated
121                     }
122                     #endif
123                     int64_t h = (hf >> 2) ;
124                     if (h == 0 || h == i1)
125                     {
126                         // h=0: unoccupied, h=i1: occupied by i
127                         do  // lock the entry
128                         {
129                             // do this atomically:
130                             // { hf = Hf [hash] ; Hf [hash] |= 3 ; }
131                             GB_ATOMIC_CAPTURE_INT64_OR (hf, Hf [hash], 3) ;
132                         } while ((hf & 3) == 3) ; // owner: f=0 or 2
133 
134                         if (hf == 0) // f == 0
135                         {
136                             // C(i,j) is a new entry in C(:,j)
137                             // Hx [hash] = t
138                             GB_ATOMIC_WRITE_HX (hash, t) ;
139                             GB_ATOMIC_WRITE
140                             Hf [hash] = i_unlocked ; // unlock entry
141                             break ;
142                         }
143                         if (hf == i_unlocked) // f == 2
144                         {
145                             // C(i,j) already appears in C(:,j)
146                             // Hx [hash] += t
147                             GB_ATOMIC_UPDATE_HX (hash, t) ;
148                             GB_ATOMIC_WRITE
149                             Hf [hash] = i_unlocked ; // unlock entry
150                             break ;
151                         }
152 
153                         // hash table occupied, but not with i
154                         GB_ATOMIC_WRITE
155                         Hf [hash] = hf ;  // unlock with prior value
156                     }
157                 }
158             }
159         }
160     }
161 }
162 
163