1 //------------------------------------------------------------------------------
2 // GB_AxB_dot_cij: compute C(i,j) = A(:,i)'*B(:,j)
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // computes C(i,j) = A (:,i)'*B(:,j) via sparse dot product.  This template is
11 // used for all three cases: C=A'*B, C<M>=A'*B, and C<!M>=A'*B in dot2 when C
12 // is bitmap, and for C<M>=A'*B when C and M are sparse or hyper in dot3.
13 
14 // When used as the multiplicative operator, the PAIR operator provides some
15 // useful special cases.  Its output is always one, for any matching pair of
16 // entries A(k,i)'*B(k,j) for some k.  If the monoid is ANY, then C(i,j)=1 if
17 // the intersection for the dot product is non-empty.  This intersection has to
18 // be found, in general.  However, suppose B(:,j) is dense.  Then every entry
19 // in the pattern of A(:,i)' will produce a 1 from the PAIR operator.  If the
20 // monoid is ANY, then C(i,j)=1 if A(:,i)' is nonempty.  If the monoid is PLUS,
21 // then C(i,j) is simply nnz(A(:,i)), assuming no overflow.  The XOR monoid
22 // acts like a 1-bit summation, so the result of the XOR_PAIR_BOOL semiring
23 // will be C(i,j) = mod (nnz(A(:,i)'*B(:,j)),2).
24 
25 // If both A(:,i) and B(:,j) are sparse, then the intersection must still be
26 // found, so these optimizations can be used only if A(:,i) and/or B(:,j) are
27 // entirely populated.
28 
29 // For built-in, pre-generated semirings, the PAIR operator is only coupled
30 // with either the ANY, PLUS, EQ, or XOR monoids, since the other monoids are
31 // equivalent to the ANY monoid.
32 
33 //------------------------------------------------------------------------------
34 // C(i,j) = A(:,i)'*B(:,j): a single dot product
35 //------------------------------------------------------------------------------
36 
37 {
38     int64_t pB = pB_start ;
39 
40     #if ( GB_A_IS_FULL && GB_B_IS_FULL )
41     {
42 
43         //----------------------------------------------------------------------
44         // both A and B are full
45         //----------------------------------------------------------------------
46 
47         #if GB_IS_PAIR_MULTIPLIER
48         {
49             #if GB_IS_ANY_MONOID
50             // ANY monoid: take the first entry found; this sets cij = 1
51             GB_MULT (cij, ignore, ignore, 0, 0, 0) ;
52             #elif GB_IS_EQ_MONOID
53             // EQ_PAIR semiring: all entries are equal to 1
54             cij = 1 ;
55             #elif (GB_CTYPE_BITS > 0)
56             // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
57             // for bool, 8-bit, 16-bit, or 32-bit integer
58             cij = (GB_CTYPE) (((uint64_t) vlen) & GB_CTYPE_BITS) ;
59             #else
60             // PLUS monoid for float, double, or 64-bit integers
61             cij = GB_CTYPE_CAST (vlen, 0) ;
62             #endif
63         }
64         #else
65         {
66             // cij = A(0,i) * B(0,j)
67             GB_GETA (aki, Ax, pA) ;             // aki = A(0,i)
68             GB_GETB (bkj, Bx, pB) ;             // bkj = B(0,j)
69             GB_MULT (cij, aki, bkj, i, 0, j) ;  // cij = aki * bkj
70             GB_PRAGMA_SIMD_DOT (cij)
71             for (int64_t k = 1 ; k < vlen ; k++)
72             {
73                 GB_DOT_TERMINAL (cij) ;             // break if cij terminal
74                 // cij += A(k,i) * B(k,j)
75                 GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
76                 GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
77                 GB_MULTADD (cij, aki, bkj, i, k, j) ; // cij += aki * bkj
78             }
79         }
80         #endif
81         GB_DOT_ALWAYS_SAVE_CIJ ;
82 
83     }
84     #elif ( GB_A_IS_FULL && GB_B_IS_BITMAP )
85     {
86 
87         //----------------------------------------------------------------------
88         // A is full and B is bitmap
89         //----------------------------------------------------------------------
90 
91         for (int64_t k = 0 ; k < vlen ; k++)
92         {
93             if (Bb [pB+k])
94             {
95                 GB_DOT (k, pA+k, pB+k) ;
96             }
97         }
98         GB_DOT_SAVE_CIJ ;
99 
100     }
101     #elif ( GB_A_IS_FULL && ( GB_B_IS_SPARSE || GB_B_IS_HYPER ) )
102     {
103 
104         //----------------------------------------------------------------------
105         // A is full and B is sparse/hyper
106         //----------------------------------------------------------------------
107 
108         #if GB_IS_PAIR_MULTIPLIER
109         {
110             #if GB_IS_ANY_MONOID
111             // ANY monoid: take the first entry found; this sets cij = 1
112             GB_MULT (cij, ignore, ignore, 0, 0, 0) ;
113             #elif GB_IS_EQ_MONOID
114             // EQ_PAIR semiring: all entries are equal to 1
115             cij = 1 ;
116             #elif (GB_CTYPE_BITS > 0)
117             // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
118             // for bool, 8-bit, 16-bit, or 32-bit integer
119             cij = (GB_CTYPE) (((uint64_t) bjnz) & GB_CTYPE_BITS) ;
120             #else
121             // PLUS monoid for float, double, or 64-bit integers
122             cij = GB_CTYPE_CAST (bjnz, 0) ;
123             #endif
124         }
125         #else
126         {
127             int64_t k = Bi [pB] ;               // first row index of B(:,j)
128             // cij = A(k,i) * B(k,j)
129             GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
130             GB_GETB (bkj, Bx, pB  ) ;           // bkj = B(k,j)
131             GB_MULT (cij, aki, bkj, i, k, j) ;  // cij = aki * bkj
132             GB_PRAGMA_SIMD_DOT (cij)
133             for (int64_t p = pB+1 ; p < pB_end ; p++)
134             {
135                 GB_DOT_TERMINAL (cij) ;             // break if cij terminal
136                 int64_t k = Bi [p] ;
137                 // cij += A(k,i) * B(k,j)
138                 GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
139                 GB_GETB (bkj, Bx, p   ) ;           // bkj = B(k,j)
140                 GB_MULTADD (cij, aki, bkj, i, k, j) ;   // cij += aki * bkj
141             }
142         }
143         #endif
144         GB_DOT_ALWAYS_SAVE_CIJ ;
145 
146     }
147     #elif ( GB_A_IS_BITMAP && GB_B_IS_FULL )
148     {
149 
150         //----------------------------------------------------------------------
151         // A is bitmap and B is full
152         //----------------------------------------------------------------------
153 
154         for (int64_t k = 0 ; k < vlen ; k++)
155         {
156             if (Ab [pA+k])
157             {
158                 GB_DOT (k, pA+k, pB+k) ;
159             }
160         }
161         GB_DOT_SAVE_CIJ ;
162 
163     }
164     #elif ( GB_A_IS_BITMAP && GB_B_IS_BITMAP )
165     {
166 
167         //----------------------------------------------------------------------
168         // both A and B are bitmap
169         //----------------------------------------------------------------------
170 
171         for (int64_t k = 0 ; k < vlen ; k++)
172         {
173             if (Ab [pA+k] && Bb [pB+k])
174             {
175                 GB_DOT (k, pA+k, pB+k) ;
176             }
177         }
178         GB_DOT_SAVE_CIJ ;
179 
180     }
181     #elif ( GB_A_IS_BITMAP && ( GB_B_IS_SPARSE || GB_B_IS_HYPER ) )
182     {
183 
184         //----------------------------------------------------------------------
185         // A is bitmap and B is sparse/hyper
186         //----------------------------------------------------------------------
187 
188         for (int64_t p = pB ; p < pB_end ; p++)
189         {
190             int64_t k = Bi [p] ;
191             if (Ab [pA+k])
192             {
193                 GB_DOT (k, pA+k, p) ;
194             }
195         }
196         GB_DOT_SAVE_CIJ ;
197 
198     }
199     #elif ( (GB_A_IS_SPARSE || GB_A_IS_HYPER) && GB_B_IS_FULL )
200     {
201 
202         //----------------------------------------------------------------------
203         // A is sparse/hyper and B is full
204         //----------------------------------------------------------------------
205 
206         #if GB_IS_PAIR_MULTIPLIER
207         {
208             #if GB_IS_ANY_MONOID
209             // ANY monoid: take the first entry found; this sets cij = 1
210             GB_MULT (cij, ignore, ignore, 0, 0, 0) ;
211             #elif GB_IS_EQ_MONOID
212             // EQ_PAIR semiring: all entries are equal to 1
213             cij = 1 ;
214             #elif (GB_CTYPE_BITS > 0)
215             // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
216             // for bool, 8-bit, 16-bit, or 32-bit integer
217             cij = (GB_CTYPE) (((uint64_t) ainz) & GB_CTYPE_BITS) ;
218             #else
219             // PLUS monoid for float, double, or 64-bit integers
220             cij = GB_CTYPE_CAST (ainz, 0) ;
221             #endif
222         }
223         #else
224         {
225             int64_t k = Ai [pA] ;               // first row index of A(:,i)
226             // cij = A(k,i) * B(k,j)
227             GB_GETA (aki, Ax, pA  ) ;           // aki = A(k,i)
228             GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
229             GB_MULT (cij, aki, bkj, i, k, j) ;  // cij = aki * bkj
230             GB_PRAGMA_SIMD_DOT (cij)
231             for (int64_t p = pA+1 ; p < pA_end ; p++)
232             {
233                 GB_DOT_TERMINAL (cij) ;             // break if cij terminal
234                 int64_t k = Ai [p] ;
235                 // cij += A(k,i) * B(k,j)
236                 GB_GETA (aki, Ax, p   ) ;           // aki = A(k,i)
237                 GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
238                 GB_MULTADD (cij, aki, bkj, i, k, j) ;   // cij += aki * bkj
239             }
240         }
241         #endif
242         GB_DOT_ALWAYS_SAVE_CIJ ;
243 
244     }
245     #elif ( (GB_A_IS_SPARSE || GB_A_IS_HYPER) && GB_B_IS_BITMAP )
246     {
247 
248         //----------------------------------------------------------------------
249         // A is sparse/hyper and B is bitmap
250         //----------------------------------------------------------------------
251 
252         for (int64_t p = pA ; p < pA_end ; p++)
253         {
254             int64_t k = Ai [p] ;
255             if (Bb [pB+k])
256             {
257                 GB_DOT (k, p, pB+k) ;
258             }
259         }
260         GB_DOT_SAVE_CIJ ;
261 
262     }
263     #else
264     {
265 
266         //----------------------------------------------------------------------
267         // both A and B are sparse/hyper
268         //----------------------------------------------------------------------
269 
270 
271         if (Ai [pA_end-1] < ib_first || ib_last < Ai [pA])
272         {
273 
274             //------------------------------------------------------------------
275             // pattern of A(:,i) and B(:,j) don't overlap; C(i,j) doesn't exist
276             //------------------------------------------------------------------
277 
278             ASSERT (!GB_CIJ_EXISTS) ;
279 
280         }
281         else if (ainz > 8 * bjnz)
282         {
283 
284             //------------------------------------------------------------------
285             // B(:,j) is very sparse compared to A(:,i)
286             //------------------------------------------------------------------
287 
288             while (pA < pA_end && pB < pB_end)
289             {
290                 int64_t ia = Ai [pA] ;
291                 int64_t ib = Bi [pB] ;
292                 if (ia < ib)
293                 {
294                     // A(ia,i) appears before B(ib,j)
295                     // discard all entries A(ia:ib-1,i)
296                     int64_t pleft = pA + 1 ;
297                     int64_t pright = pA_end - 1 ;
298                     GB_TRIM_BINARY_SEARCH (ib, Ai, pleft, pright) ;
299                     ASSERT (pleft > pA) ;
300                     pA = pleft ;
301                 }
302                 else if (ib < ia)
303                 {
304                     // B(ib,j) appears before A(ia,i)
305                     pB++ ;
306                 }
307                 else // ia == ib == k
308                 {
309                     // A(k,i) and B(k,j) are the next entries to merge
310                     GB_DOT (ia, pA, pB) ;
311                     pA++ ;
312                     pB++ ;
313                 }
314             }
315             GB_DOT_SAVE_CIJ ;
316 
317         }
318         else if (bjnz > 8 * ainz)
319         {
320 
321             //------------------------------------------------------------------
322             // A(:,i) is very sparse compared to B(:,j)
323             //------------------------------------------------------------------
324 
325             while (pA < pA_end && pB < pB_end)
326             {
327                 int64_t ia = Ai [pA] ;
328                 int64_t ib = Bi [pB] ;
329                 if (ia < ib)
330                 {
331                     // A(ia,i) appears before B(ib,j)
332                     pA++ ;
333                 }
334                 else if (ib < ia)
335                 {
336                     // B(ib,j) appears before A(ia,i)
337                     // discard all entries B(ib:ia-1,j)
338                     int64_t pleft = pB + 1 ;
339                     int64_t pright = pB_end - 1 ;
340                     GB_TRIM_BINARY_SEARCH (ia, Bi, pleft, pright) ;
341                     ASSERT (pleft > pB) ;
342                     pB = pleft ;
343                 }
344                 else // ia == ib == k
345                 {
346                     // A(k,i) and B(k,j) are the next entries to merge
347                     GB_DOT (ia, pA, pB) ;
348                     pA++ ;
349                     pB++ ;
350                 }
351             }
352             GB_DOT_SAVE_CIJ ;
353 
354         }
355         else
356         {
357 
358             //------------------------------------------------------------------
359             // A(:,i) and B(:,j) have about the same sparsity
360             //------------------------------------------------------------------
361 
362             while (pA < pA_end && pB < pB_end)
363             {
364                 int64_t ia = Ai [pA] ;
365                 int64_t ib = Bi [pB] ;
366                 if (ia < ib)
367                 {
368                     // A(ia,i) appears before B(ib,j)
369                     pA++ ;
370                 }
371                 else if (ib < ia)
372                 {
373                     // B(ib,j) appears before A(ia,i)
374                     pB++ ;
375                 }
376                 else // ia == ib == k
377                 {
378                     // A(k,i) and B(k,j) are the next entries to merge
379                     GB_DOT (ia, pA, pB) ;
380                     pA++ ;
381                     pB++ ;
382                 }
383             }
384             GB_DOT_SAVE_CIJ ;
385         }
386     }
387     #endif
388 }
389 
390