1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy_generic: compute C=A*B, C<M>=A*B, or C<!M>=A*B in parallel
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // GB_AxB_saxpy_generic computes C=A*B, C<M>=A*B, or C<!M>=A*B in parallel,
11 // with arbitrary types and operators. C can have any sparsity pattern:
12 // hyper, sparse, bitmap, or full. For all cases, the four matrices C, M
13 // (if present), A, and B have the same format (by-row or by-column), or they
14 // represent implicitly transposed matrices with the same effect. This method
15 // does not handle the dot-product methods, which compute C=A'*B if A and B
16 // are held by column, or equivalently A*B' if both are held by row.
17
18 // This method uses GB_AxB_saxpy_template.c to implement two meta-methods,
19 // each of which can contain further specialized methods (such as the fine/
20 // coarse x Gustavson/Hash, mask/no-mask methods in saxpy3):
21
22 // saxpy3: general purpose method, where C is sparse or hypersparse,
23 // via GB_AxB_saxpy3_template.c. SaxpyTasks holds the (fine/coarse x
24 // Gustavson/Hash) tasks constructed by GB_AxB_saxpy3_slice*.
25
26 // bitmap_saxpy: general purpose method, where C is bitmap or full, via
27 // GB_bitmap_AxB_saxpy_template.c. The method constructs its own
28 // tasks in workspace defined and freed in that template.
29
30 //------------------------------------------------------------------------------
31
32 #include "GB_mxm.h"
33 #include "GB_ek_slice.h"
34 #include "GB_binop.h"
35 #include "GB_bracket.h"
36 #include "GB_sort.h"
37 #include "GB_atomics.h"
38 #include "GB_ek_slice_search.c"
39 #include "GB_bitmap_assign_methods.h"
40
GB_AxB_saxpy_generic(GrB_Matrix C,const GrB_Matrix M,bool Mask_comp,const bool Mask_struct,const bool M_packed_in_place,const GrB_Matrix A,bool A_is_pattern,const GrB_Matrix B,bool B_is_pattern,const GrB_Semiring semiring,const bool flipxy,const int saxpy_method,GB_saxpy3task_struct * restrict SaxpyTasks,int ntasks,int nfine,int nthreads,const int do_sort,GB_Context Context)41 GrB_Info GB_AxB_saxpy_generic
42 (
43 GrB_Matrix C, // any sparsity
44 const GrB_Matrix M,
45 bool Mask_comp,
46 const bool Mask_struct,
47 const bool M_packed_in_place, // ignored if C is bitmap
48 const GrB_Matrix A,
49 bool A_is_pattern,
50 const GrB_Matrix B,
51 bool B_is_pattern,
52 const GrB_Semiring semiring, // semiring that defines C=A*B
53 const bool flipxy, // if true, do z=fmult(b,a) vs fmult(a,b)
54 const int saxpy_method, // saxpy3 or bitmap method
55 // for saxpy3 only:
56 GB_saxpy3task_struct *restrict SaxpyTasks, // NULL if C is bitmap
57 int ntasks,
58 int nfine,
59 int nthreads,
60 const int do_sort, // if true, sort in saxpy3
61 GB_Context Context
62 )
63 {
64
65 //--------------------------------------------------------------------------
66 // get operators, functions, workspace, contents of A, B, and C
67 //--------------------------------------------------------------------------
68
69 GrB_BinaryOp mult = semiring->multiply ;
70 GrB_Monoid add = semiring->add ;
71 GB_void *identity = (GB_void *) add->identity ;
72 ASSERT (mult->ztype == add->op->ztype) ;
73 ASSERT (mult->ztype == C->type) ;
74
75 GxB_binary_function fmult = mult->function ; // NULL if positional
76 GxB_binary_function fadd = add->op->function ;
77 GB_Opcode opcode = mult->opcode ;
78 bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
79
80 size_t csize = C->type->size ;
81 size_t asize = A_is_pattern ? 0 : A->type->size ;
82 size_t bsize = B_is_pattern ? 0 : B->type->size ;
83
84 size_t xsize = mult->xtype->size ;
85 size_t ysize = mult->ytype->size ;
86
87 // scalar workspace: because of typecasting, the x/y types need not
88 // be the same as the size of the A and B types.
89 // flipxy false: aik = (xtype) A(i,k) and bkj = (ytype) B(k,j)
90 // flipxy true: aik = (ytype) A(i,k) and bkj = (xtype) B(k,j)
91 size_t aik_size = flipxy ? ysize : xsize ;
92 size_t bkj_size = flipxy ? xsize : ysize ;
93
94 GB_cast_function cast_A, cast_B ;
95 if (flipxy)
96 {
97 // A is typecasted to y, and B is typecasted to x
98 cast_A = A_is_pattern ? NULL :
99 GB_cast_factory (mult->ytype->code, A->type->code) ;
100 cast_B = B_is_pattern ? NULL :
101 GB_cast_factory (mult->xtype->code, B->type->code) ;
102 }
103 else
104 {
105 // A is typecasted to x, and B is typecasted to y
106 cast_A = A_is_pattern ? NULL :
107 GB_cast_factory (mult->xtype->code, A->type->code) ;
108 cast_B = B_is_pattern ? NULL :
109 GB_cast_factory (mult->ytype->code, B->type->code) ;
110 }
111
112 //--------------------------------------------------------------------------
113 // C = A*B via saxpy3 or bitmap method, function pointers, and typecasting
114 //--------------------------------------------------------------------------
115
116 // memcpy (&(Cx [pC]), &(Hx [i]), len*csize)
117 #define GB_CIJ_MEMCPY(pC,i,len) memcpy (GB_CX (pC), GB_HX (i), (len)*csize)
118
119 // atomic update not available for function pointers
120 #define GB_HAS_ATOMIC 0
121
122 // user-defined monoid update cannot be done with an OpenMP atomic
123 #define GB_HAS_OMP_ATOMIC 0
124
125 // no special cases
126 #define GB_IS_ANY_MONOID 0
127 #define GB_IS_ANY_FC32_MONOID 0
128 #define GB_IS_ANY_FC64_MONOID 0
129 #define GB_IS_PLUS_FC32_MONOID 0
130 #define GB_IS_PLUS_FC64_MONOID 0
131 #define GB_IS_ANY_PAIR_SEMIRING 0
132 #define GB_IS_PAIR_MULTIPLIER 0
133
134 // a generic semiring does not have a concise bitmap multiply-add statement
135 #define GB_HAS_BITMAP_MULTADD 0
136 #define GB_XINIT ;
137 #define GB_XLOAD(bkj) ;
138
139 #define GB_ATYPE GB_void
140 #define GB_BTYPE GB_void
141 #define GB_ASIZE asize
142 #define GB_BSIZE bsize
143
144 // no vectorization
145 #define GB_PRAGMA_SIMD_VECTORIZE ;
146
147 // The monoid identity byte value is not used in saxpy3
148 #define GB_GENERIC
149 #define GB_HAS_IDENTITY_BYTE 0
150 #define GB_IDENTITY_BYTE (none)
151
152 // definitions for GB_AxB_saxpy_template.c
153 #include "GB_AxB_saxpy3_template.h"
154
155 if (op_is_positional)
156 {
157
158 //----------------------------------------------------------------------
159 // generic semirings with positional mulitiply operators
160 //----------------------------------------------------------------------
161
162 GB_BURBLE_MATRIX (C, "(generic positional C=A*B) ") ;
163
164 if (flipxy)
165 {
166 // flip a positional multiplicative operator
167 bool handled ;
168 opcode = GB_binop_flip (opcode, &handled) ; // for positional ops
169 ASSERT (handled) ; // all positional ops can be flipped
170 }
171
172 // C always has type int64_t or int32_t. The monoid must be used via
173 // its function pointer. The positional multiply operator must be
174 // hard-coded since it has no function pointer. The numerical values
175 // and types of A and B are not accessed.
176
177 ASSERT (A_is_pattern) ;
178 ASSERT (B_is_pattern) ;
179
180 // aik = A(i,k), located in Ax [pA], value not used
181 #define GB_GETA(aik,Ax,pA) ;
182
183 // bkj = B(k,j), located in Bx [pB], value not used
184 #define GB_GETB(bkj,Bx,pB) ;
185
186 // Gx [pG] = A(i,k), located in Ax [pA], value not used
187 #define GB_LOADA(Gx,pG,Ax,pA) ;
188
189 // Gx [pG] = B(k,j), located in Bx [pB], value not used
190 #define GB_LOADB(Gx,pG,Bx,pB) ;
191
192 // define t for each task
193 #define GB_CIJ_DECLARE(t) GB_CTYPE t
194
195 // address of Cx [p]
196 #define GB_CX(p) (&Cx [p])
197
198 // Cx [p] = t
199 #define GB_CIJ_WRITE(p,t) Cx [p] = t
200
201 // address of Hx [i]
202 #define GB_HX(i) (&Hx [i])
203
204 // Hx [i] = t
205 #define GB_HX_WRITE(i,t) Hx [i] = t
206
207 // Hx [i] = identity
208 #define GB_HX_CLEAR(i) memcpy (GB_HX (i), identity, csize)
209
210 // Cx [p] = Hx [i]
211 #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
212
213 // Cx [p] += Hx [i]
214 #define GB_CIJ_GATHER_UPDATE(p,i) fadd (GB_CX (p), GB_CX (p), GB_HX (i))
215
216 // Cx [p] += t
217 #define GB_CIJ_UPDATE(p,t) fadd (GB_CX (p), GB_CX (p), &t)
218
219 // Hx [i] += t
220 #define GB_HX_UPDATE(i,t) fadd (GB_HX (i), GB_HX (i), &t)
221
222 int64_t offset = GB_positional_offset (opcode) ;
223
224 if (mult->ztype == GrB_INT64)
225 {
226 #undef GB_CTYPE
227 #define GB_CTYPE int64_t
228 #undef GB_CSIZE
229 #define GB_CSIZE (sizeof (int64_t))
230 ASSERT (C->type == GrB_INT64) ;
231 ASSERT (csize == sizeof (int64_t)) ;
232 switch (opcode)
233 {
234 case GB_FIRSTI_opcode : // z = first_i(A(i,k),y) == i
235 case GB_FIRSTI1_opcode : // z = first_i1(A(i,k),y) == i+1
236 #undef GB_MULT
237 #define GB_MULT(t, aik, bkj, i, k, j) t = i + offset
238 #include "GB_AxB_saxpy_template.c"
239 break ;
240 case GB_FIRSTJ_opcode : // z = first_j(A(i,k),y) == k
241 case GB_FIRSTJ1_opcode : // z = first_j1(A(i,k),y) == k+1
242 case GB_SECONDI_opcode : // z = second_i(x,B(k,j)) == k
243 case GB_SECONDI1_opcode : // z = second_i1(x,B(k,j)) == k+1
244 #undef GB_MULT
245 #define GB_MULT(t, aik, bkj, i, k, j) t = k + offset
246 #include "GB_AxB_saxpy_template.c"
247 break ;
248 case GB_SECONDJ_opcode : // z = second_j(x,B(k,j)) == j
249 case GB_SECONDJ1_opcode : // z = second_j1(x,B(k,j)) == j+1
250 #undef GB_MULT
251 #define GB_MULT(t, aik, bkj, i, k, j) t = j + offset
252 #include "GB_AxB_saxpy_template.c"
253 break ;
254 default: ;
255 }
256 }
257 else
258 {
259 #undef GB_CTYPE
260 #define GB_CTYPE int32_t
261 #undef GB_CSIZE
262 #define GB_CSIZE (sizeof (int32_t))
263 ASSERT (C->type == GrB_INT32) ;
264 ASSERT (csize == sizeof (int32_t)) ;
265 switch (opcode)
266 {
267 case GB_FIRSTI_opcode : // z = first_i(A(i,k),y) == i
268 case GB_FIRSTI1_opcode : // z = first_i1(A(i,k),y) == i+1
269 #undef GB_MULT
270 #define GB_MULT(t,aik,bkj,i,k,j) t = (int32_t) (i + offset)
271 #include "GB_AxB_saxpy_template.c"
272 break ;
273 case GB_FIRSTJ_opcode : // z = first_j(A(i,k),y) == k
274 case GB_FIRSTJ1_opcode : // z = first_j1(A(i,k),y) == k+1
275 case GB_SECONDI_opcode : // z = second_i(x,B(k,j)) == k
276 case GB_SECONDI1_opcode : // z = second_i1(x,B(k,j)) == k+1
277 #undef GB_MULT
278 #define GB_MULT(t,aik,bkj,i,k,j) t = (int32_t) (k + offset)
279 #include "GB_AxB_saxpy_template.c"
280 break ;
281 case GB_SECONDJ_opcode : // z = second_j(x,B(k,j)) == j
282 case GB_SECONDJ1_opcode : // z = second_j1(x,B(k,j)) == j+1
283 #undef GB_MULT
284 #define GB_MULT(t,aik,bkj,i,k,j) t = (int32_t) (j + offset)
285 #include "GB_AxB_saxpy_template.c"
286 break ;
287 default: ;
288 }
289 }
290
291 }
292 else
293 {
294
295 //----------------------------------------------------------------------
296 // generic semirings with standard multiply operators
297 //----------------------------------------------------------------------
298
299 GB_BURBLE_MATRIX (C, "(generic C=A*B) ") ;
300
301 // aik = A(i,k), located in Ax [pA]
302 #undef GB_GETA
303 #define GB_GETA(aik,Ax,pA) \
304 GB_void aik [GB_VLA(aik_size)] ; \
305 if (!A_is_pattern) cast_A (aik, Ax +((pA)*asize), asize)
306
307 // bkj = B(k,j), located in Bx [pB]
308 #undef GB_GETB
309 #define GB_GETB(bkj,Bx,pB) \
310 GB_void bkj [GB_VLA(bkj_size)] ; \
311 if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize)
312
313 // Gx [pG] = A(i,k), located in Ax [pA], no typecasting
314 #undef GB_LOADA
315 #define GB_LOADA(Gx,pG,Ax,pA) \
316 memcpy (Gx + ((pG)*asize), Ax +((pA)*asize), asize)
317
318 // Gx [pG] = B(k,j), located in Bx [pB], no typecasting
319 #undef GB_LOADB
320 #define GB_LOADB(Gx,pG,Bx,pB) \
321 memcpy (Gx + ((pG)*bsize), Bx +((pB)*bsize), bsize)
322
323 // define t for each task
324 #undef GB_CIJ_DECLARE
325 #define GB_CIJ_DECLARE(t) GB_void t [GB_VLA(csize)]
326
327 // address of Cx [p]
328 #undef GB_CX
329 #define GB_CX(p) (Cx +((p)*csize))
330
331 // Cx [p] = t
332 #undef GB_CIJ_WRITE
333 #define GB_CIJ_WRITE(p,t) memcpy (GB_CX (p), t, csize)
334
335 // address of Hx [i]
336 #undef GB_HX
337 #define GB_HX(i) (Hx +((i)*csize))
338
339 // Hx [i] = t
340 #undef GB_HX_WRITE
341 #define GB_HX_WRITE(i,t) memcpy (GB_HX (i), t, csize)
342
343 // Cx [p] = Hx [i]
344 #undef GB_CIJ_GATHER
345 #define GB_CIJ_GATHER(p,i) memcpy (GB_CX (p), GB_HX(i), csize)
346
347 // Cx [p] += Hx [i]
348 #undef GB_CIJ_GATHER_UPDATE
349 #define GB_CIJ_GATHER_UPDATE(p,i) fadd (GB_CX (p), GB_CX (p), GB_HX (i))
350
351 // Cx [p] += t
352 #undef GB_CIJ_UPDATE
353 #define GB_CIJ_UPDATE(p,t) fadd (GB_CX (p), GB_CX (p), t)
354
355 // Hx [i] += t
356 #undef GB_HX_UPDATE
357 #define GB_HX_UPDATE(i,t) fadd (GB_HX (i), GB_HX (i), t)
358
359 #undef GB_CTYPE
360 #define GB_CTYPE GB_void
361
362 #undef GB_CSIZE
363 #define GB_CSIZE csize
364
365 if (opcode == GB_FIRST_opcode || opcode == GB_SECOND_opcode)
366 {
367 // fmult is not used and can be NULL (for user-defined types)
368 if (flipxy)
369 {
370 // flip first and second
371 bool handled ;
372 opcode = GB_binop_flip (opcode, &handled) ; // for 1st and 2nd
373 ASSERT (handled) ; // FIRST and SECOND can be flipped
374 }
375 if (opcode == GB_FIRST_opcode)
376 {
377 // t = A(i,k)
378 ASSERT (B_is_pattern) ;
379 #undef GB_MULT
380 #define GB_MULT(t, aik, bkj, i, k, j) memcpy (t, aik, csize)
381 #include "GB_AxB_saxpy_template.c"
382 }
383 else // opcode == GB_SECOND_opcode
384 {
385 // t = B(i,k)
386 ASSERT (A_is_pattern) ;
387 #undef GB_MULT
388 #define GB_MULT(t, aik, bkj, i, k, j) memcpy (t, bkj, csize)
389 #include "GB_AxB_saxpy_template.c"
390 }
391 }
392 else
393 {
394 ASSERT (fmult != NULL) ;
395 if (flipxy)
396 {
397 // t = B(k,j) * A(i,k)
398 #undef GB_MULT
399 #define GB_MULT(t, aik, bkj, i, k, j) fmult (t, bkj, aik)
400 #include "GB_AxB_saxpy_template.c"
401 }
402 else
403 {
404 // t = A(i,k) * B(k,j)
405 #undef GB_MULT
406 #define GB_MULT(t, aik, bkj, i, k, j) fmult (t, aik, bkj)
407 #include "GB_AxB_saxpy_template.c"
408 }
409 }
410 }
411
412 return (GrB_SUCCESS) ;
413 }
414
415