1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy_generic: compute C=A*B, C<M>=A*B, or C<!M>=A*B in parallel
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // GB_AxB_saxpy_generic computes C=A*B, C<M>=A*B, or C<!M>=A*B in parallel,
11 // with arbitrary types and operators.  C can have any sparsity pattern:
12 // hyper, sparse, bitmap, or full.  For all cases, the four matrices C, M
13 // (if present), A, and B have the same format (by-row or by-column), or they
14 // represent implicitly transposed matrices with the same effect.  This method
15 // does not handle the dot-product methods, which compute C=A'*B if A and B
16 // are held by column, or equivalently A*B' if both are held by row.
17 
18 // This method uses GB_AxB_saxpy_template.c to implement two meta-methods,
19 // each of which can contain further specialized methods (such as the fine/
20 // coarse x Gustavson/Hash, mask/no-mask methods in saxpy3):
21 
22 // saxpy3: general purpose method, where C is sparse or hypersparse,
23 //          via GB_AxB_saxpy3_template.c.  SaxpyTasks holds the (fine/coarse x
24 //          Gustavson/Hash) tasks constructed by GB_AxB_saxpy3_slice*.
25 
26 // bitmap_saxpy: general purpose method, where C is bitmap or full, via
27 //          GB_bitmap_AxB_saxpy_template.c.  The method constructs its own
28 //          tasks in workspace defined and freed in that template.
29 
30 //------------------------------------------------------------------------------
31 
32 #include "GB_mxm.h"
33 #include "GB_ek_slice.h"
34 #include "GB_binop.h"
35 #include "GB_bracket.h"
36 #include "GB_sort.h"
37 #include "GB_atomics.h"
38 #include "GB_ek_slice_search.c"
39 #include "GB_bitmap_assign_methods.h"
40 
GB_AxB_saxpy_generic(GrB_Matrix C,const GrB_Matrix M,bool Mask_comp,const bool Mask_struct,const bool M_packed_in_place,const GrB_Matrix A,bool A_is_pattern,const GrB_Matrix B,bool B_is_pattern,const GrB_Semiring semiring,const bool flipxy,const int saxpy_method,GB_saxpy3task_struct * restrict SaxpyTasks,int ntasks,int nfine,int nthreads,const int do_sort,GB_Context Context)41 GrB_Info GB_AxB_saxpy_generic
42 (
43     GrB_Matrix C,                   // any sparsity
44     const GrB_Matrix M,
45     bool Mask_comp,
46     const bool Mask_struct,
47     const bool M_packed_in_place,   // ignored if C is bitmap
48     const GrB_Matrix A,
49     bool A_is_pattern,
50     const GrB_Matrix B,
51     bool B_is_pattern,
52     const GrB_Semiring semiring,    // semiring that defines C=A*B
53     const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
54     const int saxpy_method,         // saxpy3 or bitmap method
55     // for saxpy3 only:
56     GB_saxpy3task_struct *restrict SaxpyTasks, // NULL if C is bitmap
57     int ntasks,
58     int nfine,
59     int nthreads,
60     const int do_sort,              // if true, sort in saxpy3
61     GB_Context Context
62 )
63 {
64 
65     //--------------------------------------------------------------------------
66     // get operators, functions, workspace, contents of A, B, and C
67     //--------------------------------------------------------------------------
68 
69     GrB_BinaryOp mult = semiring->multiply ;
70     GrB_Monoid add = semiring->add ;
71     GB_void *identity = (GB_void *) add->identity ;
72     ASSERT (mult->ztype == add->op->ztype) ;
73     ASSERT (mult->ztype == C->type) ;
74 
75     GxB_binary_function fmult = mult->function ;    // NULL if positional
76     GxB_binary_function fadd  = add->op->function ;
77     GB_Opcode opcode = mult->opcode ;
78     bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
79 
80     size_t csize = C->type->size ;
81     size_t asize = A_is_pattern ? 0 : A->type->size ;
82     size_t bsize = B_is_pattern ? 0 : B->type->size ;
83 
84     size_t xsize = mult->xtype->size ;
85     size_t ysize = mult->ytype->size ;
86 
87     // scalar workspace: because of typecasting, the x/y types need not
88     // be the same as the size of the A and B types.
89     // flipxy false: aik = (xtype) A(i,k) and bkj = (ytype) B(k,j)
90     // flipxy true:  aik = (ytype) A(i,k) and bkj = (xtype) B(k,j)
91     size_t aik_size = flipxy ? ysize : xsize ;
92     size_t bkj_size = flipxy ? xsize : ysize ;
93 
94     GB_cast_function cast_A, cast_B ;
95     if (flipxy)
96     {
97         // A is typecasted to y, and B is typecasted to x
98         cast_A = A_is_pattern ? NULL :
99                  GB_cast_factory (mult->ytype->code, A->type->code) ;
100         cast_B = B_is_pattern ? NULL :
101                  GB_cast_factory (mult->xtype->code, B->type->code) ;
102     }
103     else
104     {
105         // A is typecasted to x, and B is typecasted to y
106         cast_A = A_is_pattern ? NULL :
107                  GB_cast_factory (mult->xtype->code, A->type->code) ;
108         cast_B = B_is_pattern ? NULL :
109                  GB_cast_factory (mult->ytype->code, B->type->code) ;
110     }
111 
112     //--------------------------------------------------------------------------
113     // C = A*B via saxpy3 or bitmap method, function pointers, and typecasting
114     //--------------------------------------------------------------------------
115 
116     // memcpy (&(Cx [pC]), &(Hx [i]), len*csize)
117     #define GB_CIJ_MEMCPY(pC,i,len) memcpy (GB_CX (pC), GB_HX (i), (len)*csize)
118 
119     // atomic update not available for function pointers
120     #define GB_HAS_ATOMIC 0
121 
122     // user-defined monoid update cannot be done with an OpenMP atomic
123     #define GB_HAS_OMP_ATOMIC 0
124 
125     // no special cases
126     #define GB_IS_ANY_MONOID 0
127     #define GB_IS_ANY_FC32_MONOID 0
128     #define GB_IS_ANY_FC64_MONOID 0
129     #define GB_IS_PLUS_FC32_MONOID 0
130     #define GB_IS_PLUS_FC64_MONOID 0
131     #define GB_IS_ANY_PAIR_SEMIRING 0
132     #define GB_IS_PAIR_MULTIPLIER 0
133 
134     // a generic semiring does not have a concise bitmap multiply-add statement
135     #define GB_HAS_BITMAP_MULTADD 0
136     #define GB_XINIT ;
137     #define GB_XLOAD(bkj) ;
138 
139     #define GB_ATYPE GB_void
140     #define GB_BTYPE GB_void
141     #define GB_ASIZE asize
142     #define GB_BSIZE bsize
143 
144     // no vectorization
145     #define GB_PRAGMA_SIMD_VECTORIZE ;
146 
147     // The monoid identity byte value is not used in saxpy3
148     #define GB_GENERIC
149     #define GB_HAS_IDENTITY_BYTE 0
150     #define GB_IDENTITY_BYTE (none)
151 
152     // definitions for GB_AxB_saxpy_template.c
153     #include "GB_AxB_saxpy3_template.h"
154 
155     if (op_is_positional)
156     {
157 
158         //----------------------------------------------------------------------
159         // generic semirings with positional mulitiply operators
160         //----------------------------------------------------------------------
161 
162         GB_BURBLE_MATRIX (C, "(generic positional C=A*B) ") ;
163 
164         if (flipxy)
165         {
166             // flip a positional multiplicative operator
167             bool handled ;
168             opcode = GB_binop_flip (opcode, &handled) ;  // for positional ops
169             ASSERT (handled) ;      // all positional ops can be flipped
170         }
171 
172         // C always has type int64_t or int32_t.  The monoid must be used via
173         // its function pointer.  The positional multiply operator must be
174         // hard-coded since it has no function pointer.  The numerical values
175         // and types of A and B are not accessed.
176 
177         ASSERT (A_is_pattern) ;
178         ASSERT (B_is_pattern) ;
179 
180         // aik = A(i,k), located in Ax [pA], value not used
181         #define GB_GETA(aik,Ax,pA) ;
182 
183         // bkj = B(k,j), located in Bx [pB], value not used
184         #define GB_GETB(bkj,Bx,pB) ;
185 
186         // Gx [pG] = A(i,k), located in Ax [pA], value not used
187         #define GB_LOADA(Gx,pG,Ax,pA) ;
188 
189         // Gx [pG] = B(k,j), located in Bx [pB], value not used
190         #define GB_LOADB(Gx,pG,Bx,pB) ;
191 
192         // define t for each task
193         #define GB_CIJ_DECLARE(t) GB_CTYPE t
194 
195         // address of Cx [p]
196         #define GB_CX(p) (&Cx [p])
197 
198         // Cx [p] = t
199         #define GB_CIJ_WRITE(p,t) Cx [p] = t
200 
201         // address of Hx [i]
202         #define GB_HX(i) (&Hx [i])
203 
204         // Hx [i] = t
205         #define GB_HX_WRITE(i,t) Hx [i] = t
206 
207         // Hx [i] = identity
208         #define GB_HX_CLEAR(i) memcpy (GB_HX (i), identity, csize)
209 
210         // Cx [p] = Hx [i]
211         #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
212 
213         // Cx [p] += Hx [i]
214         #define GB_CIJ_GATHER_UPDATE(p,i) fadd (GB_CX (p), GB_CX (p), GB_HX (i))
215 
216         // Cx [p] += t
217         #define GB_CIJ_UPDATE(p,t) fadd (GB_CX (p), GB_CX (p), &t)
218 
219         // Hx [i] += t
220         #define GB_HX_UPDATE(i,t) fadd (GB_HX (i), GB_HX (i), &t)
221 
222         int64_t offset = GB_positional_offset (opcode) ;
223 
224         if (mult->ztype == GrB_INT64)
225         {
226             #undef  GB_CTYPE
227             #define GB_CTYPE int64_t
228             #undef  GB_CSIZE
229             #define GB_CSIZE (sizeof (int64_t))
230             ASSERT (C->type == GrB_INT64) ;
231             ASSERT (csize == sizeof (int64_t)) ;
232             switch (opcode)
233             {
234                 case GB_FIRSTI_opcode   :   // z = first_i(A(i,k),y) == i
235                 case GB_FIRSTI1_opcode  :   // z = first_i1(A(i,k),y) == i+1
236                     #undef  GB_MULT
237                     #define GB_MULT(t, aik, bkj, i, k, j) t = i + offset
238                     #include "GB_AxB_saxpy_template.c"
239                     break ;
240                 case GB_FIRSTJ_opcode   :   // z = first_j(A(i,k),y) == k
241                 case GB_FIRSTJ1_opcode  :   // z = first_j1(A(i,k),y) == k+1
242                 case GB_SECONDI_opcode  :   // z = second_i(x,B(k,j)) == k
243                 case GB_SECONDI1_opcode :   // z = second_i1(x,B(k,j)) == k+1
244                     #undef  GB_MULT
245                     #define GB_MULT(t, aik, bkj, i, k, j) t = k + offset
246                     #include "GB_AxB_saxpy_template.c"
247                     break ;
248                 case GB_SECONDJ_opcode  :   // z = second_j(x,B(k,j)) == j
249                 case GB_SECONDJ1_opcode :   // z = second_j1(x,B(k,j)) == j+1
250                     #undef  GB_MULT
251                     #define GB_MULT(t, aik, bkj, i, k, j) t = j + offset
252                     #include "GB_AxB_saxpy_template.c"
253                     break ;
254                 default: ;
255             }
256         }
257         else
258         {
259             #undef  GB_CTYPE
260             #define GB_CTYPE int32_t
261             #undef  GB_CSIZE
262             #define GB_CSIZE (sizeof (int32_t))
263             ASSERT (C->type == GrB_INT32) ;
264             ASSERT (csize == sizeof (int32_t)) ;
265             switch (opcode)
266             {
267                 case GB_FIRSTI_opcode   :   // z = first_i(A(i,k),y) == i
268                 case GB_FIRSTI1_opcode  :   // z = first_i1(A(i,k),y) == i+1
269                     #undef  GB_MULT
270                     #define GB_MULT(t,aik,bkj,i,k,j) t = (int32_t) (i + offset)
271                     #include "GB_AxB_saxpy_template.c"
272                     break ;
273                 case GB_FIRSTJ_opcode   :   // z = first_j(A(i,k),y) == k
274                 case GB_FIRSTJ1_opcode  :   // z = first_j1(A(i,k),y) == k+1
275                 case GB_SECONDI_opcode  :   // z = second_i(x,B(k,j)) == k
276                 case GB_SECONDI1_opcode :   // z = second_i1(x,B(k,j)) == k+1
277                     #undef  GB_MULT
278                     #define GB_MULT(t,aik,bkj,i,k,j) t = (int32_t) (k + offset)
279                     #include "GB_AxB_saxpy_template.c"
280                     break ;
281                 case GB_SECONDJ_opcode  :   // z = second_j(x,B(k,j)) == j
282                 case GB_SECONDJ1_opcode :   // z = second_j1(x,B(k,j)) == j+1
283                     #undef  GB_MULT
284                     #define GB_MULT(t,aik,bkj,i,k,j) t = (int32_t) (j + offset)
285                     #include "GB_AxB_saxpy_template.c"
286                     break ;
287                 default: ;
288             }
289         }
290 
291     }
292     else
293     {
294 
295         //----------------------------------------------------------------------
296         // generic semirings with standard multiply operators
297         //----------------------------------------------------------------------
298 
299         GB_BURBLE_MATRIX (C, "(generic C=A*B) ") ;
300 
301         // aik = A(i,k), located in Ax [pA]
302         #undef  GB_GETA
303         #define GB_GETA(aik,Ax,pA)                                          \
304             GB_void aik [GB_VLA(aik_size)] ;                                \
305             if (!A_is_pattern) cast_A (aik, Ax +((pA)*asize), asize)
306 
307         // bkj = B(k,j), located in Bx [pB]
308         #undef  GB_GETB
309         #define GB_GETB(bkj,Bx,pB)                                          \
310             GB_void bkj [GB_VLA(bkj_size)] ;                                \
311             if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize)
312 
313         // Gx [pG] = A(i,k), located in Ax [pA], no typecasting
314         #undef  GB_LOADA
315         #define GB_LOADA(Gx,pG,Ax,pA)                                       \
316             memcpy (Gx + ((pG)*asize), Ax +((pA)*asize), asize)
317 
318         // Gx [pG] = B(k,j), located in Bx [pB], no typecasting
319         #undef  GB_LOADB
320         #define GB_LOADB(Gx,pG,Bx,pB)                                       \
321             memcpy (Gx + ((pG)*bsize), Bx +((pB)*bsize), bsize)
322 
323         // define t for each task
324         #undef  GB_CIJ_DECLARE
325         #define GB_CIJ_DECLARE(t) GB_void t [GB_VLA(csize)]
326 
327         // address of Cx [p]
328         #undef  GB_CX
329         #define GB_CX(p) (Cx +((p)*csize))
330 
331         // Cx [p] = t
332         #undef  GB_CIJ_WRITE
333         #define GB_CIJ_WRITE(p,t) memcpy (GB_CX (p), t, csize)
334 
335         // address of Hx [i]
336         #undef  GB_HX
337         #define GB_HX(i) (Hx +((i)*csize))
338 
339         // Hx [i] = t
340         #undef  GB_HX_WRITE
341         #define GB_HX_WRITE(i,t) memcpy (GB_HX (i), t, csize)
342 
343         // Cx [p] = Hx [i]
344         #undef  GB_CIJ_GATHER
345         #define GB_CIJ_GATHER(p,i) memcpy (GB_CX (p), GB_HX(i), csize)
346 
347         // Cx [p] += Hx [i]
348         #undef  GB_CIJ_GATHER_UPDATE
349         #define GB_CIJ_GATHER_UPDATE(p,i) fadd (GB_CX (p), GB_CX (p), GB_HX (i))
350 
351         // Cx [p] += t
352         #undef  GB_CIJ_UPDATE
353         #define GB_CIJ_UPDATE(p,t) fadd (GB_CX (p), GB_CX (p), t)
354 
355         // Hx [i] += t
356         #undef  GB_HX_UPDATE
357         #define GB_HX_UPDATE(i,t) fadd (GB_HX (i), GB_HX (i), t)
358 
359         #undef  GB_CTYPE
360         #define GB_CTYPE GB_void
361 
362         #undef  GB_CSIZE
363         #define GB_CSIZE csize
364 
365         if (opcode == GB_FIRST_opcode || opcode == GB_SECOND_opcode)
366         {
367             // fmult is not used and can be NULL (for user-defined types)
368             if (flipxy)
369             {
370                 // flip first and second
371                 bool handled ;
372                 opcode = GB_binop_flip (opcode, &handled) ; // for 1st and 2nd
373                 ASSERT (handled) ;      // FIRST and SECOND can be flipped
374             }
375             if (opcode == GB_FIRST_opcode)
376             {
377                 // t = A(i,k)
378                 ASSERT (B_is_pattern) ;
379                 #undef  GB_MULT
380                 #define GB_MULT(t, aik, bkj, i, k, j) memcpy (t, aik, csize)
381                 #include "GB_AxB_saxpy_template.c"
382             }
383             else // opcode == GB_SECOND_opcode
384             {
385                 // t = B(i,k)
386                 ASSERT (A_is_pattern) ;
387                 #undef  GB_MULT
388                 #define GB_MULT(t, aik, bkj, i, k, j) memcpy (t, bkj, csize)
389                 #include "GB_AxB_saxpy_template.c"
390             }
391         }
392         else
393         {
394             ASSERT (fmult != NULL) ;
395             if (flipxy)
396             {
397                 // t = B(k,j) * A(i,k)
398                 #undef  GB_MULT
399                 #define GB_MULT(t, aik, bkj, i, k, j) fmult (t, bkj, aik)
400                 #include "GB_AxB_saxpy_template.c"
401             }
402             else
403             {
404                 // t = A(i,k) * B(k,j)
405                 #undef  GB_MULT
406                 #define GB_MULT(t, aik, bkj, i, k, j) fmult (t, aik, bkj)
407                 #include "GB_AxB_saxpy_template.c"
408             }
409         }
410     }
411 
412     return (GrB_SUCCESS) ;
413 }
414 
415