1 //------------------------------------------------------------------------------
2 // GB_ewise: C<M> = accum (C, A+B) or A.*B
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // C<M> = accum (C,A+B), A.*B and variations. The input matrices A and B are
11 // optionally transposed. Does the work for GrB_eWiseAdd_* and
12 // GrB_eWiseMult_*. Handles all cases of the mask.
13
14 #include "GB_ewise.h"
15 #include "GB_add.h"
16 #include "GB_emult.h"
17 #include "GB_transpose.h"
18 #include "GB_accum_mask.h"
19 #include "GB_dense.h"
20
21 #define GB_FREE_ALL \
22 { \
23 GB_phbix_free (T) ; \
24 GB_phbix_free (AT) ; \
25 GB_phbix_free (BT) ; \
26 GB_phbix_free (MT) ; \
27 }
28
GB_ewise(GrB_Matrix C,const bool C_replace,const GrB_Matrix M,const bool Mask_comp,const bool Mask_struct,const GrB_BinaryOp accum,const GrB_BinaryOp op_in,const GrB_Matrix A,bool A_transpose,const GrB_Matrix B,bool B_transpose,bool eWiseAdd,GB_Context Context)29 GrB_Info GB_ewise // C<M> = accum (C, A+B) or A.*B
30 (
31 GrB_Matrix C, // input/output matrix for results
32 const bool C_replace, // if true, clear C before writing to it
33 const GrB_Matrix M, // optional mask for C, unused if NULL
34 const bool Mask_comp, // if true, complement the mask M
35 const bool Mask_struct, // if true, use the only structure of M
36 const GrB_BinaryOp accum, // optional accum for Z=accum(C,T)
37 const GrB_BinaryOp op_in, // defines '+' for C=A+B, or .* for A.*B
38 const GrB_Matrix A, // input matrix
39 bool A_transpose, // if true, use A' instead of A
40 const GrB_Matrix B, // input matrix
41 bool B_transpose, // if true, use B' instead of B
42 bool eWiseAdd, // if true, do set union (like A+B),
43 // otherwise do intersection (like A.*B)
44 GB_Context Context
45 )
46 {
47
48 //--------------------------------------------------------------------------
49 // check inputs
50 //--------------------------------------------------------------------------
51
52 // C may be aliased with M, A, and/or B
53
54 GrB_Info info ;
55
56 GrB_Matrix MT = NULL ;
57 struct GB_Matrix_opaque T_header, MT_header, AT_header, BT_header ;
58 GrB_Matrix T = GB_clear_static_header (&T_header) ;
59 GrB_Matrix AT = GB_clear_static_header (&AT_header) ;
60 GrB_Matrix BT = GB_clear_static_header (&BT_header) ;
61
62 GB_RETURN_IF_FAULTY_OR_POSITIONAL (accum) ;
63
64 ASSERT_MATRIX_OK (C, "C input for GB_ewise", GB0) ;
65 ASSERT_MATRIX_OK_OR_NULL (M, "M for GB_ewise", GB0) ;
66 ASSERT_BINARYOP_OK_OR_NULL (accum, "accum for GB_ewise", GB0) ;
67 ASSERT_BINARYOP_OK (op_in, "op for GB_ewise", GB0) ;
68 ASSERT_MATRIX_OK (A, "A for GB_ewise", GB0) ;
69 ASSERT_MATRIX_OK (B, "B for GB_ewise", GB0) ;
70
71 // T has the same type as the output z for z=op(a,b)
72 GrB_BinaryOp op = op_in ;
73 GrB_Type T_type = op->ztype ;
74
75 // check domains and dimensions for C<M> = accum (C,T)
76 GB_OK (GB_compatible (C->type, C, M, Mask_struct, accum, T_type, Context)) ;
77
78 // T=op(A,B) via op operator, so A and B must be compatible with z=op(a,b)
79 GB_OK (GB_BinaryOp_compatible (op, NULL, A->type, B->type,
80 GB_ignore_code, Context)) ;
81
82 if (eWiseAdd)
83 {
84 // C = A is done for entries in A but not C
85 if (!GB_Type_compatible (C->type, A->type))
86 {
87 GB_ERROR (GrB_DOMAIN_MISMATCH,
88 "First input of type [%s]\n"
89 "cannot be typecast to final output of type [%s]",
90 A->type->name, C->type->name) ;
91 }
92 // C = B is done for entries in B but not C
93 if (!GB_Type_compatible (C->type, B->type))
94 {
95 GB_ERROR (GrB_DOMAIN_MISMATCH,
96 "Second input of type [%s]\n"
97 "cannot be typecast to final output of type [%s]",
98 B->type->name, C->type->name) ;
99 }
100 }
101
102 // check the dimensions
103 int64_t anrows = (A_transpose) ? GB_NCOLS (A) : GB_NROWS (A) ;
104 int64_t ancols = (A_transpose) ? GB_NROWS (A) : GB_NCOLS (A) ;
105 int64_t bnrows = (B_transpose) ? GB_NCOLS (B) : GB_NROWS (B) ;
106 int64_t bncols = (B_transpose) ? GB_NROWS (B) : GB_NCOLS (B) ;
107 int64_t cnrows = GB_NROWS (C) ;
108 int64_t cncols = GB_NCOLS (C) ;
109 if (anrows != bnrows || ancols != bncols ||
110 cnrows != anrows || cncols != bncols)
111 {
112 GB_ERROR (GrB_DIMENSION_MISMATCH,
113 "Dimensions not compatible:\n"
114 "output is " GBd "-by-" GBd "\n"
115 "first input is " GBd "-by-" GBd "%s\n"
116 "second input is " GBd "-by-" GBd "%s",
117 cnrows, cncols,
118 anrows, ancols, A_transpose ? " (transposed)" : "",
119 bnrows, bncols, B_transpose ? " (transposed)" : "") ;
120 }
121
122 // quick return if an empty mask M is complemented
123 GB_RETURN_IF_QUICK_MASK (C, C_replace, M, Mask_comp, Mask_struct) ;
124
125 //--------------------------------------------------------------------------
126 // handle CSR and CSC formats
127 //--------------------------------------------------------------------------
128
129 GB_Opcode opcode = op->opcode ;
130 bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
131
132 // CSC/CSR format of T is same as C. Conform A and B to the format of C.
133 bool T_is_csc = C->is_csc ;
134 if (T_is_csc != A->is_csc)
135 {
136 // Flip the sense of A_transpose. For example, if C is CSC and A is
137 // CSR, and A_transpose is true, then C=A'+B is being computed. But
138 // this is the same as C=A+B where A is treated as if it is CSC.
139 A_transpose = !A_transpose ;
140 }
141
142 if (T_is_csc != B->is_csc)
143 {
144 // Flip the sense of B_transpose.
145 B_transpose = !B_transpose ;
146 }
147
148 if (A_transpose && B_transpose)
149 {
150 // T=A'+B' is not computed. Instead, T=A+B is computed first,
151 // and then C = T' is computed.
152 A_transpose = false ;
153 B_transpose = false ;
154 // The CSC format of T and C now differ.
155 T_is_csc = !T_is_csc ;
156 }
157
158 if (!T_is_csc)
159 {
160 if (op_is_positional)
161 {
162 // positional ops must be flipped, with i and j swapped
163 op = GB_positional_binop_ijflip (op) ;
164 opcode = op->opcode ;
165 }
166 }
167
168 //--------------------------------------------------------------------------
169 // decide when to apply the mask
170 //--------------------------------------------------------------------------
171
172 // GB_add and GB_emult can apply any non-complemented mask, but it is
173 // faster to exploit the mask in GB_add / GB_emult only when it is very
174 // sparse compared with A and B, or (in special cases) when it is easy
175 // to apply.
176
177 // check the CSR/CSC format of M
178 bool M_is_csc = (M == NULL) ? T_is_csc : M->is_csc ;
179
180 //--------------------------------------------------------------------------
181 // transpose M if needed
182 //--------------------------------------------------------------------------
183
184 GrB_Matrix M1 = M ;
185 bool M_transpose = (T_is_csc != M_is_csc) ;
186 if (M_transpose)
187 {
188 // MT = M'
189 // transpose: no typecast, no op, not in-place
190 GBURBLE ("(M transpose) ") ;
191 MT = GB_clear_static_header (&MT_header) ;
192 GB_OK (GB_transpose (&MT, GrB_BOOL, T_is_csc, M, // MT static
193 NULL, NULL, NULL, false, Context)) ;
194 M1 = MT ;
195 }
196
197 //--------------------------------------------------------------------------
198 // transpose A if needed
199 //--------------------------------------------------------------------------
200
201 GrB_Matrix A1 = A ;
202 if (A_transpose)
203 {
204 // AT = A'
205 // transpose: no typecast, no op, not in-place
206 GBURBLE ("(A transpose) ") ;
207 GB_OK (GB_transpose (&AT, NULL, T_is_csc, A, // AT static
208 NULL, NULL, NULL, false, Context)) ;
209 A1 = AT ;
210 ASSERT_MATRIX_OK (AT, "AT from transpose", GB0) ;
211 }
212
213 //--------------------------------------------------------------------------
214 // transpose B if needed
215 //--------------------------------------------------------------------------
216
217 GrB_Matrix B1 = B ;
218 if (B_transpose)
219 {
220 // BT = B'
221 // transpose: no typecast, no op, not in-place
222 GBURBLE ("(B transpose) ") ;
223 GB_OK (GB_transpose (&BT, NULL, T_is_csc, B, // BT static
224 NULL, NULL, NULL, false, Context)) ;
225 B1 = BT ;
226 }
227
228 //--------------------------------------------------------------------------
229 // special cases
230 //--------------------------------------------------------------------------
231
232 // FUTURE::: handle more special cases:
233 // C<M>+=A+B when C and A are dense, B is sparse. M can be sparse.
234 // C<M>+=A+B when C and B are dense, A is sparse. M can be sparse.
235 // C<M>+=A+B when C, A, and B are dense. M can be sparse.
236 // In all cases above, C remains dense and can be updated in-place
237 // C_replace must be false. M can be valued or structural.
238
239 #ifndef GBCOMPACT
240
241 bool C_is_dense = GB_is_dense (C) && !GB_PENDING_OR_ZOMBIES (C) ;
242 bool A_is_dense = GB_is_dense (A1) ;
243 bool B_is_dense = GB_is_dense (B1) ;
244
245 bool no_typecast =
246 (op->ztype == C->type) // no typecasting of C
247 && (op->xtype == A1->type) // no typecasting of A
248 && (op->ytype == B1->type) ; // no typecasting of B
249
250 bool any_bitmap =
251 GB_IS_BITMAP (C) ||
252 GB_IS_BITMAP (M) ||
253 GB_IS_BITMAP (A) ||
254 GB_IS_BITMAP (B) ;
255
256 bool any_pending_work =
257 GB_ANY_PENDING_WORK (M1) ||
258 GB_ANY_PENDING_WORK (A1) ||
259 GB_ANY_PENDING_WORK (B1) ;
260
261 // FUTURE: for sssp12:
262 // C<A> = A+B where C is sparse and B is dense;
263 // mask is structural, not complemented, C_replace is false.
264 // C is not empty. Use a kernel that computes T<A>=A+B
265 // where T starts out empty; just iterate over the entries in A.
266
267 if (A_is_dense // A and B are dense
268 && B_is_dense
269 && (M == NULL) && !Mask_comp // no mask
270 && (C->is_csc == T_is_csc) // no transpose of C
271 && no_typecast // no typecasting
272 && (opcode < GB_USER_opcode) // not a user-defined operator
273 && (!op_is_positional) // op is not positional
274 && !any_bitmap
275 && !any_pending_work) // no matrix has pending work
276 {
277
278 if (C_is_dense // C is dense
279 && accum == op // accum is same as the op
280 && (opcode >= GB_MIN_opcode) // subset of binary operators
281 && (opcode <= GB_RDIV_opcode))
282 {
283
284 //------------------------------------------------------------------
285 // C += A+B where all 3 matrices are dense
286 //------------------------------------------------------------------
287
288 // C_replace is ignored
289 GBURBLE ("dense C+=A+B ") ;
290 GB_dense_ewise3_accum (C, A1, B1, op, Context) ; // cannot fail
291 GB_FREE_ALL ;
292 ASSERT_MATRIX_OK (C, "C output for GB_ewise, dense C+=A+B", GB0) ;
293 return (GrB_SUCCESS) ;
294
295 }
296 else if (accum == NULL) // no accum
297 {
298
299 //------------------------------------------------------------------
300 // C = A+B where A and B are dense (C is anything)
301 //------------------------------------------------------------------
302
303 // C_replace is ignored
304 GBURBLE ("dense C=A+B ") ;
305 info = GB_dense_ewise3_noaccum (C, C_is_dense, A1, B1, op, Context);
306 GB_FREE_ALL ;
307 if (info == GrB_SUCCESS)
308 {
309 ASSERT_MATRIX_OK (C, "C output for GB_ewise, dense C=A+B", GB0);
310 }
311 return (info) ;
312 }
313 }
314
315 #endif
316
317 //--------------------------------------------------------------------------
318 // T = A+B or A.*B, or with any mask M
319 //--------------------------------------------------------------------------
320
321 bool mask_applied = false ;
322
323 if (eWiseAdd)
324 {
325
326 //----------------------------------------------------------------------
327 // T<any mask> = A+B
328 //----------------------------------------------------------------------
329
330 // TODO: check the mask condition in GB_add_sparsity.
331 // Only exploit the mask in GB_add if it's more efficient than
332 // exploiting it later, probably this condition:
333
334 // (accum == NULL) && (C->is_csc == T->is_csc)
335 // && (C_replace || GB_NNZ_UPPER_BOUND (C) == 0))
336
337 // If that is true and the mask is applied, then T is transplanted as
338 // the final C and the mask is no longer needed. In this case, it
339 // could be faster to exploit the mask duing GB_add.
340
341 GB_OK (GB_add (T, T_type, T_is_csc, M1, Mask_struct, Mask_comp,
342 &mask_applied, A1, B1, op, Context)) ;
343
344 }
345 else
346 {
347
348 //----------------------------------------------------------------------
349 // T<any mask> = A.*B
350 //----------------------------------------------------------------------
351
352 // T can be returned with shallow components derived from its inputs A1
353 // and/or B1. In particular, if T is hypersparse, T->h may be a
354 // shallow copy of A1->h, B1->h, or M1->h. T is hypersparse if any
355 // matrix A1, B1, or M1 are hypersparse. Internally, T->h always
356 // starts as a shallow copy of A1->h, B1->h, or M1->h, but it may be
357 // pruned by GB_hypermatrix_prune, and thus no longer shallow.
358
359 GB_OK (GB_emult (T, T_type, T_is_csc, M1, Mask_struct, Mask_comp,
360 &mask_applied, A1, B1, op, Context)) ;
361
362 //----------------------------------------------------------------------
363 // transplant shallow content from AT, BT, or MT
364 //----------------------------------------------------------------------
365
366 // If T is hypersparse, T->h is always a shallow copy of A1->h, B1->h,
367 // or M1->h. Any of the three matrices A1, B1, or M1 may be temporary
368 // transposes, AT, BT, and MT respectively. If T->h is a shallow cpoy
369 // of a temporary matrix, then flip the ownership of the T->h array,
370 // from the temporary matrix into T, so that T->h is not freed when AT,
371 // BT, and MT are freed.
372
373 // GB_tranpose can return all kinds of shallow components, particularly
374 // when transposing vectors. It can return AT->h as shallow copy of
375 // A->i, for example.
376
377 if (T->h_shallow)
378 {
379 // T->h is shallow and T is hypersparse
380 ASSERT (GB_IS_HYPERSPARSE (T)) ;
381
382 // one of A1, B1, or M1 is hypersparse
383 ASSERT (GB_IS_HYPERSPARSE (A1) || GB_IS_HYPERSPARSE (B1) ||
384 GB_IS_HYPERSPARSE (M1))
385 if (A_transpose && T->h == A1->h)
386 {
387 // A1 is the temporary matrix AT. AT->h might itself be a
388 // shallow copy of A->h or A->i, from GB_transpose.
389 ASSERT (A1 == AT) ;
390 T->h_shallow = AT->h_shallow ;
391 AT->h_shallow = true ;
392 }
393 else if (B_transpose && T->h == B1->h)
394 {
395 // B1 is the temporary matrix BT. BT->h might itself be a
396 // shallow copy of B->h or B->i, from GB_transpose.
397 ASSERT (B1 == BT) ;
398 T->h_shallow = BT->h_shallow ;
399 BT->h_shallow = true ;
400 }
401 else if (M_transpose && T->h == M1->h)
402 {
403 // M1 is the temporary matrix MT. MT->h might itself be a
404 // shallow copy of M->h or M->i, from GB_transpose.
405 ASSERT (M1 == MT) ;
406 T->h_shallow = MT->h_shallow ;
407 MT->h_shallow = true ;
408 }
409
410 // T->h may still be shallow, but if so, it is a shallow copy of
411 // some component of the user input matrices A, B, or M, and must
412 // remain shallow. A deep copy of it will be made when T->h is
413 // transplanted into the result C.
414 ASSERT (GB_IMPLIES (T->h_shallow,
415 (T->h == A1->h || T->h == B1->h ||
416 (M1 != NULL && T->h == M1->h)))) ;
417 }
418 }
419
420 //--------------------------------------------------------------------------
421 // free the transposed matrices
422 //--------------------------------------------------------------------------
423
424 GB_phbix_free (AT) ;
425 GB_phbix_free (BT) ;
426
427 //--------------------------------------------------------------------------
428 // C<M> = accum (C,T): accumulate the results into C via the mask
429 //--------------------------------------------------------------------------
430
431 ASSERT_MATRIX_OK (T, "T from GB_ewise, prior to C<M>=accum(C,T)", GB0) ;
432
433 if ((accum == NULL) && (C->is_csc == T->is_csc)
434 && (M == NULL || (M != NULL && mask_applied))
435 && (C_replace || GB_NNZ_UPPER_BOUND (C) == 0))
436 {
437 // C = 0 ; C = (ctype) T ; with the same CSR/CSC format. The mask M
438 // (if any) has already been applied. If C is also empty, or to be
439 // cleared anyway, and if accum is not present, then T can be
440 // transplanted directly into C, as C = (ctype) T, typecasting if
441 // needed. If no typecasting is done then this takes no time at all
442 // and is a pure transplant. Also conform C to its desired
443 // hypersparsity.
444 GB_phbix_free (MT) ;
445 GB_OK (GB_transplant_conform (C, C->type, &T, Context)) ;
446 return (GB_block (C, Context)) ;
447 }
448 else
449 {
450 // C<M> = accum (C,T)
451 // GB_accum_mask also conforms C to its desired hypersparsity
452 info = GB_accum_mask (C, M, MT, accum, &T, C_replace, Mask_comp,
453 Mask_struct, Context) ;
454 GB_phbix_free (MT) ;
455 return (info) ;
456 }
457 }
458
459