1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_fineGus_M_phase2: C<M>=A*B, fine Gustavson, phase2
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11 
12     //--------------------------------------------------------------------------
13     // phase2: fine Gustavson task, C(:,j)<M(:,j)>=A*B(:,j)
14     //--------------------------------------------------------------------------
15 
16     // Hf [i] is 0 if M(i,j) not present or M(i,j)=0.
17     // 0 -> 1 : has already been done in phase0 if M(i,j)=1.
18 
19     // 0 -> 0 : to ignore, if M(i,j)=0
20     // 1 -> 3 : to lock, if i seen for first time
21     // 2 -> 3 : to lock, if i seen already
22     // 3 -> 2 : to unlock; now i has been seen
23 
24     GB_GET_M_j ;                // get M(:,j)
25     GB_GET_M_j_RANGE (16) ;     // get first and last in M(:,j)
26     for ( ; pB < pB_end ; pB++)     // scan B(:,j)
27     {
28         GB_GET_B_kj_INDEX ;         // get index k of B(k,j)
29         GB_GET_A_k ;                // get A(:,k)
30         if (aknz == 0) continue ;
31         GB_GET_B_kj ;               // bkj = B(k,j)
32 
33         #if GB_IS_ANY_MONOID
34 
35             //------------------------------------------------------------------
36             // C(i,j) += A(i,k)*B(k,j) ; with the ANY monoid
37             //------------------------------------------------------------------
38 
39             #define GB_IKJ                                         \
40                 int8_t f ;                                         \
41                 GB_ATOMIC_READ                                     \
42                 f = Hf [i] ;            /* grab the entry */       \
43                 if (f == 0 || f == 2) continue ;                   \
44                 GB_ATOMIC_WRITE                                    \
45                 Hf [i] = 2 ;            /* unlock the entry */     \
46                 GB_MULT_A_ik_B_kj ;     /* t = A(i,k) * B(k,j) */  \
47                 GB_ATOMIC_WRITE_HX (i, t) ;    /* Hx [i] = t */
48 
49         #else
50 
51             //------------------------------------------------------------------
52             // C(i,j) += A(i,k)*B(k,j) ; all other monoids
53             //------------------------------------------------------------------
54 
55             #define GB_IKJ                                         \
56             {                                                      \
57                 GB_MULT_A_ik_B_kj ;     /* t = A(i,k) * B(k,j) */  \
58                 int8_t f ;                                         \
59                 GB_ATOMIC_READ                                     \
60                 f = Hf [i] ;            /* grab the entry */       \
61                 if (GB_HAS_ATOMIC && (f == 2))                     \
62                 {                                                  \
63                     /* C(i,j) already seen; update it */           \
64                     GB_ATOMIC_UPDATE_HX (i, t) ; /* Hx [i] += t */ \
65                     continue ;       /* C(i,j) has been updated */ \
66                 }                                                  \
67                 if (f == 0) continue ; /* M(i,j)=0; ignore C(i,j)*/\
68                 do  /* lock the entry */                           \
69                 {                                                  \
70                     /* do this atomically: */                      \
71                     /* { f = Hf [i] ; Hf [i] = 3 ; } */            \
72                     GB_ATOMIC_CAPTURE_INT8 (f, Hf [i], 3) ;        \
73                 } while (f == 3) ; /* lock owner gets f=1 or 2 */  \
74                 if (f == 1)                                        \
75                 {                                                  \
76                     /* C(i,j) is a new entry */                    \
77                     GB_ATOMIC_WRITE_HX (i, t) ; /* Hx [i] = t */   \
78                 }                                                  \
79                 else /* f == 2 */                                  \
80                 {                                                  \
81                     /* C(i,j) already appears in C(:,j) */         \
82                     GB_ATOMIC_UPDATE_HX (i, t) ; /* Hx [i] += t */ \
83                 }                                                  \
84                 GB_ATOMIC_WRITE                                    \
85                 Hf [i] = 2 ;                /* unlock the entry */ \
86             }
87 
88         #endif
89 
90         GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ;
91         #undef GB_IKJ
92     }
93 }
94 
95