1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_coarseGus_M_phase5: C<M>=A*B, coarse Gustavson, phase5
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11 
12     //--------------------------------------------------------------------------
13     // phase5: coarse Gustavson task, C<M>=A*B
14     //--------------------------------------------------------------------------
15 
16     // Initially, Hf [...] < mark for all of Hf.
17 
18     // Hf [i] < mark    : M(i,j)=0, C(i,j) is ignored.
19     // Hf [i] == mark   : M(i,j)=1, and C(i,j) not yet seen.
20     // Hf [i] == mark+1 : M(i,j)=1, and C(i,j) has been seen.
21 
22     for (int64_t kk = kfirst ; kk <= klast ; kk++)
23     {
24         int64_t pC = Cp [kk] ;
25         int64_t cjnz = Cp [kk+1] - pC ;
26         if (cjnz == 0) continue ;   // nothing to do
27         GB_GET_B_j ;                // get B(:,j)
28 
29         #ifndef GB_GENERIC
30         if (cjnz == cvlen)          // C(:,j) is dense
31         {
32             // This is not used for the generic saxpy3.
33             GB_COMPUTE_DENSE_C_j ;  // C(:,j) = A*B(:,j)
34             continue ;
35         }
36         #endif
37 
38         GB_GET_M_j ;            // get M(:,j)
39         GB_GET_M_j_RANGE (64) ; // get first and last in M(:,j)
40         mark += 2 ;
41         int64_t mark1 = mark+1 ;
42 
43         // scatter M(:,j) into the Gustavson workspace
44         GB_SCATTER_M_j (pM_start, pM_end, mark) ;
45 
46         if (16 * cjnz > cvlen)
47         {
48 
49             //------------------------------------------------------------------
50             // C(:,j) is not very sparse
51             //------------------------------------------------------------------
52 
53             for ( ; pB < pB_end ; pB++)     // scan B(:,j)
54             {
55                 GB_GET_B_kj_INDEX ;         // get k of B(k,j)
56                 GB_GET_A_k ;                // get A(:,k)
57                 if (aknz == 0) continue ;
58                 GB_GET_B_kj ;               // bkj = B(k,j)
59                 #define GB_IKJ                                 \
60                 {                                              \
61                     int64_t hf = Hf [i] ;                      \
62                     if (hf == mark)                            \
63                     {                                          \
64                         /* C(i,j) = A(i,k) * B(k,j) */         \
65                         Hf [i] = mark1 ;     /* mark as seen */\
66                         GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
67                         GB_HX_WRITE (i, t) ; /* Hx [i] = t */  \
68                     }                                          \
69                     else if (hf == mark1)                      \
70                     {                                          \
71                         /* C(i,j) += A(i,k) * B(k,j) */        \
72                         GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
73                         GB_HX_UPDATE (i, t) ;/* Hx [i] += t */ \
74                     }                                          \
75                 }
76                 GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ;
77                 #undef GB_IKJ
78             }
79             GB_GATHER_ALL_C_j(mark1) ;  // gather into C(:,j)
80 
81         }
82         else
83         {
84 
85             //------------------------------------------------------------------
86             // C(:,j) is very sparse
87             //------------------------------------------------------------------
88 
89             for ( ; pB < pB_end ; pB++)     // scan B(:,j)
90             {
91                 GB_GET_B_kj_INDEX ;         // get k of B(k,j)
92                 GB_GET_A_k ;                // get A(:,k)
93                 if (aknz == 0) continue ;
94                 GB_GET_B_kj ;               // bkj = B(k,j)
95                 #define GB_IKJ                                 \
96                 {                                              \
97                     int64_t hf = Hf [i] ;                      \
98                     if (hf == mark)                            \
99                     {                                          \
100                         /* C(i,j) = A(i,k) * B(k,j) */         \
101                         Hf [i] = mark1 ;     /* mark as seen */\
102                         GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
103                         GB_HX_WRITE (i, t) ; /* Hx [i] = t */  \
104                         Ci [pC++] = i ; /* C(:,j) pattern */   \
105                     }                                          \
106                     else if (hf == mark1)                      \
107                     {                                          \
108                         /* C(i,j) += A(i,k) * B(k,j) */        \
109                         GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
110                         GB_HX_UPDATE (i, t) ;/* Hx [i] += t */ \
111                     }                                          \
112                 }
113                 GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ;
114                 #undef GB_IKJ
115             }
116             GB_SORT_AND_GATHER_C_j ;    // gather into C(:,j)
117         }
118     }
119 }
120 
121