1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_coarseGus_noM_phase5: numeric coarse Gustavson, no mask
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11     for (int64_t kk = kfirst ; kk <= klast ; kk++)
12     {
13 
14         //----------------------------------------------------------------------
15         // get C(:,j) and B(:,j)
16         //----------------------------------------------------------------------
17 
18         int64_t pC = Cp [kk] ;
19         int64_t cjnz = Cp [kk+1] - pC ;
20         if (cjnz == 0) continue ;           // no work to do if C(:,j) empty
21         GB_GET_B_j ;
22 
23         //----------------------------------------------------------------------
24         // special case when C (:,j) is dense
25         //----------------------------------------------------------------------
26 
27         #ifndef GB_GENERIC
28         if (cjnz == cvlen)          // C(:,j) is dense
29         {
30             // This is not used for the generic saxpy3.
31             GB_COMPUTE_DENSE_C_j ;  // C(:,j) = A*B(:,j)
32             continue ;
33         }
34         #endif
35 
36         //----------------------------------------------------------------------
37         // C(:,j) = A*B(:,j)
38         //----------------------------------------------------------------------
39 
40         mark++ ;
41         if (bjnz == 1 && (A_is_sparse || A_is_hyper))
42         {
43 
44             //------------------------------------------------------------------
45             // C(:,j) = A(:,k)*B(k,j) where B(:,j) has a single entry
46             //------------------------------------------------------------------
47 
48             GB_COMPUTE_C_j_WHEN_NNZ_B_j_IS_ONE ;
49 
50         }
51         else if (16 * cjnz > cvlen)
52         {
53 
54             //------------------------------------------------------------------
55             // C(:,j) is not very sparse
56             //------------------------------------------------------------------
57 
58             for ( ; pB < pB_end ; pB++)     // scan B(:,j)
59             {
60                 GB_GET_B_kj_INDEX ;             // get index k of entry B(k,j)
61                 GB_GET_A_k ;                    // get A(:,k)
62                 if (aknz == 0) continue ;       // skip if A(:,k) is empty
63                 GB_GET_B_kj ;                   // bkj = B(k,j)
64                 // scan A(:,k)
65                 for (int64_t pA = pA_start ; pA < pA_end ; pA++)
66                 {
67                     GB_GET_A_ik_INDEX ;         // get index i of entry A(i,k)
68                     GB_MULT_A_ik_B_kj ;         // t = A(i,k)*B(k,j)
69                     if (Hf [i] != mark)
70                     {
71                         // C(i,j) = A(i,k) * B(k,j)
72                         Hf [i] = mark ;
73                         GB_HX_WRITE (i, t) ;    // Hx [i] = t
74                     }
75                     else
76                     {
77                         // C(i,j) += A(i,k) * B(k,j)
78                         GB_HX_UPDATE (i, t) ;   // Hx [i] += t
79                     }
80                 }
81             }
82             GB_GATHER_ALL_C_j (mark) ;          // gather into C(:,j)
83 
84         }
85         else
86         {
87 
88             //------------------------------------------------------------------
89             // C(:,j) is very sparse
90             //------------------------------------------------------------------
91 
92             for ( ; pB < pB_end ; pB++)     // scan B(:,j)
93             {
94                 GB_GET_B_kj_INDEX ;             // get index k of entry B(k,j)
95                 GB_GET_A_k ;                    // get A(:,k)
96                 if (aknz == 0) continue ;       // skip if A(:,k) is empty
97                 GB_GET_B_kj ;                   // bkj = B(k,j)
98                 // scan A(:,k)
99                 for (int64_t pA = pA_start ; pA < pA_end ; pA++)
100                 {
101                     GB_GET_A_ik_INDEX ;         // get index i of entry A(i,k)
102                     GB_MULT_A_ik_B_kj ;         // t = A(i,k)*B(k,j)
103                     if (Hf [i] != mark)
104                     {
105                         // C(i,j) = A(i,k) * B(k,j)
106                         Hf [i] = mark ;
107                         GB_HX_WRITE (i, t) ;    // Hx [i] = t
108                         Ci [pC++] = i ;
109                     }
110                     else
111                     {
112                         // C(i,j) += A(i,k) * B(k,j)
113                         GB_HX_UPDATE (i, t) ;   // Hx [i] += t
114                     }
115                 }
116             }
117             GB_SORT_AND_GATHER_C_j ;            // gather into C(:,j)
118         }
119     }
120 }
121 
122