1 //------------------------------------------------------------------------------
2 // GB_ewise: C<M> = accum (C, A+B) or A.*B
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C<M> = accum (C,A+B), A.*B and variations.  The input matrices A and B are
11 // optionally transposed.  Does the work for GrB_eWiseAdd_* and
12 // GrB_eWiseMult_*.  Handles all cases of the mask.
13 
14 #include "GB_ewise.h"
15 #include "GB_add.h"
16 #include "GB_emult.h"
17 #include "GB_transpose.h"
18 #include "GB_accum_mask.h"
19 #include "GB_dense.h"
20 
21 #define GB_FREE_ALL         \
22 {                           \
23     GB_phbix_free (T) ;     \
24     GB_phbix_free (AT) ;    \
25     GB_phbix_free (BT) ;    \
26     GB_phbix_free (MT) ;    \
27 }
28 
GB_ewise(GrB_Matrix C,const bool C_replace,const GrB_Matrix M,const bool Mask_comp,const bool Mask_struct,const GrB_BinaryOp accum,const GrB_BinaryOp op_in,const GrB_Matrix A,bool A_transpose,const GrB_Matrix B,bool B_transpose,bool eWiseAdd,GB_Context Context)29 GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
30 (
31     GrB_Matrix C,                   // input/output matrix for results
32     const bool C_replace,           // if true, clear C before writing to it
33     const GrB_Matrix M,             // optional mask for C, unused if NULL
34     const bool Mask_comp,           // if true, complement the mask M
35     const bool Mask_struct,         // if true, use the only structure of M
36     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
37     const GrB_BinaryOp op_in,       // defines '+' for C=A+B, or .* for A.*B
38     const GrB_Matrix A,             // input matrix
39     bool A_transpose,               // if true, use A' instead of A
40     const GrB_Matrix B,             // input matrix
41     bool B_transpose,               // if true, use B' instead of B
42     bool eWiseAdd,                  // if true, do set union (like A+B),
43                                     // otherwise do intersection (like A.*B)
44     GB_Context Context
45 )
46 {
47 
48     //--------------------------------------------------------------------------
49     // check inputs
50     //--------------------------------------------------------------------------
51 
52     // C may be aliased with M, A, and/or B
53 
54     GrB_Info info ;
55 
56     GrB_Matrix MT = NULL ;
57     struct GB_Matrix_opaque T_header, MT_header, AT_header, BT_header ;
58     GrB_Matrix T  = GB_clear_static_header (&T_header) ;
59     GrB_Matrix AT = GB_clear_static_header (&AT_header) ;
60     GrB_Matrix BT = GB_clear_static_header (&BT_header) ;
61 
62     GB_RETURN_IF_FAULTY_OR_POSITIONAL (accum) ;
63 
64     ASSERT_MATRIX_OK (C, "C input for GB_ewise", GB0) ;
65     ASSERT_MATRIX_OK_OR_NULL (M, "M for GB_ewise", GB0) ;
66     ASSERT_BINARYOP_OK_OR_NULL (accum, "accum for GB_ewise", GB0) ;
67     ASSERT_BINARYOP_OK (op_in, "op for GB_ewise", GB0) ;
68     ASSERT_MATRIX_OK (A, "A for GB_ewise", GB0) ;
69     ASSERT_MATRIX_OK (B, "B for GB_ewise", GB0) ;
70 
71     // T has the same type as the output z for z=op(a,b)
72     GrB_BinaryOp op = op_in ;
73     GrB_Type T_type = op->ztype ;
74 
75     // check domains and dimensions for C<M> = accum (C,T)
76     GB_OK (GB_compatible (C->type, C, M, Mask_struct, accum, T_type, Context)) ;
77 
78     // T=op(A,B) via op operator, so A and B must be compatible with z=op(a,b)
79     GB_OK (GB_BinaryOp_compatible (op, NULL, A->type, B->type,
80         GB_ignore_code, Context)) ;
81 
82     if (eWiseAdd)
83     {
84         // C = A is done for entries in A but not C
85         if (!GB_Type_compatible (C->type, A->type))
86         {
87             GB_ERROR (GrB_DOMAIN_MISMATCH,
88                 "First input of type [%s]\n"
89                 "cannot be typecast to final output of type [%s]",
90                 A->type->name, C->type->name) ;
91         }
92         // C = B is done for entries in B but not C
93         if (!GB_Type_compatible (C->type, B->type))
94         {
95             GB_ERROR (GrB_DOMAIN_MISMATCH,
96                 "Second input of type [%s]\n"
97                 "cannot be typecast to final output of type [%s]",
98                 B->type->name, C->type->name) ;
99         }
100     }
101 
102     // check the dimensions
103     int64_t anrows = (A_transpose) ? GB_NCOLS (A) : GB_NROWS (A) ;
104     int64_t ancols = (A_transpose) ? GB_NROWS (A) : GB_NCOLS (A) ;
105     int64_t bnrows = (B_transpose) ? GB_NCOLS (B) : GB_NROWS (B) ;
106     int64_t bncols = (B_transpose) ? GB_NROWS (B) : GB_NCOLS (B) ;
107     int64_t cnrows = GB_NROWS (C) ;
108     int64_t cncols = GB_NCOLS (C) ;
109     if (anrows != bnrows || ancols != bncols ||
110         cnrows != anrows || cncols != bncols)
111     {
112         GB_ERROR (GrB_DIMENSION_MISMATCH,
113             "Dimensions not compatible:\n"
114             "output is " GBd "-by-" GBd "\n"
115             "first input is " GBd "-by-" GBd "%s\n"
116             "second input is " GBd "-by-" GBd "%s",
117             cnrows, cncols,
118             anrows, ancols, A_transpose ? " (transposed)" : "",
119             bnrows, bncols, B_transpose ? " (transposed)" : "") ;
120     }
121 
122     // quick return if an empty mask M is complemented
123     GB_RETURN_IF_QUICK_MASK (C, C_replace, M, Mask_comp, Mask_struct) ;
124 
125     //--------------------------------------------------------------------------
126     // handle CSR and CSC formats
127     //--------------------------------------------------------------------------
128 
129     GB_Opcode opcode = op->opcode ;
130     bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
131 
132     // CSC/CSR format of T is same as C.  Conform A and B to the format of C.
133     bool T_is_csc = C->is_csc ;
134     if (T_is_csc != A->is_csc)
135     {
136         // Flip the sense of A_transpose.  For example, if C is CSC and A is
137         // CSR, and A_transpose is true, then C=A'+B is being computed.  But
138         // this is the same as C=A+B where A is treated as if it is CSC.
139         A_transpose = !A_transpose ;
140     }
141 
142     if (T_is_csc != B->is_csc)
143     {
144         // Flip the sense of B_transpose.
145         B_transpose = !B_transpose ;
146     }
147 
148     if (A_transpose && B_transpose)
149     {
150         // T=A'+B' is not computed.  Instead, T=A+B is computed first,
151         // and then C = T' is computed.
152         A_transpose = false ;
153         B_transpose = false ;
154         // The CSC format of T and C now differ.
155         T_is_csc = !T_is_csc ;
156     }
157 
158     if (!T_is_csc)
159     {
160         if (op_is_positional)
161         {
162             // positional ops must be flipped, with i and j swapped
163             op = GB_positional_binop_ijflip (op) ;
164             opcode = op->opcode ;
165         }
166     }
167 
168     //--------------------------------------------------------------------------
169     // decide when to apply the mask
170     //--------------------------------------------------------------------------
171 
172     // GB_add and GB_emult can apply any non-complemented mask, but it is
173     // faster to exploit the mask in GB_add / GB_emult only when it is very
174     // sparse compared with A and B, or (in special cases) when it is easy
175     // to apply.
176 
177     // check the CSR/CSC format of M
178     bool M_is_csc = (M == NULL) ? T_is_csc : M->is_csc ;
179 
180     //--------------------------------------------------------------------------
181     // transpose M if needed
182     //--------------------------------------------------------------------------
183 
184     GrB_Matrix M1 = M ;
185     bool M_transpose = (T_is_csc != M_is_csc) ;
186     if (M_transpose)
187     {
188         // MT = M'
189         // transpose: no typecast, no op, not in-place
190         GBURBLE ("(M transpose) ") ;
191         MT = GB_clear_static_header (&MT_header) ;
192         GB_OK (GB_transpose (&MT, GrB_BOOL, T_is_csc, M,    // MT static
193             NULL, NULL, NULL, false, Context)) ;
194         M1 = MT ;
195     }
196 
197     //--------------------------------------------------------------------------
198     // transpose A if needed
199     //--------------------------------------------------------------------------
200 
201     GrB_Matrix A1 = A ;
202     if (A_transpose)
203     {
204         // AT = A'
205         // transpose: no typecast, no op, not in-place
206         GBURBLE ("(A transpose) ") ;
207         GB_OK (GB_transpose (&AT, NULL, T_is_csc, A,        // AT static
208             NULL, NULL, NULL, false, Context)) ;
209         A1 = AT ;
210         ASSERT_MATRIX_OK (AT, "AT from transpose", GB0) ;
211     }
212 
213     //--------------------------------------------------------------------------
214     // transpose B if needed
215     //--------------------------------------------------------------------------
216 
217     GrB_Matrix B1 = B ;
218     if (B_transpose)
219     {
220         // BT = B'
221         // transpose: no typecast, no op, not in-place
222         GBURBLE ("(B transpose) ") ;
223         GB_OK (GB_transpose (&BT, NULL, T_is_csc, B,        // BT static
224             NULL, NULL, NULL, false, Context)) ;
225         B1 = BT ;
226     }
227 
228     //--------------------------------------------------------------------------
229     // special cases
230     //--------------------------------------------------------------------------
231 
232     // FUTURE::: handle more special cases:
233     // C<M>+=A+B when C and A are dense, B is sparse.  M can be sparse.
234     // C<M>+=A+B when C and B are dense, A is sparse.  M can be sparse.
235     // C<M>+=A+B when C, A, and B are dense.  M can be sparse.
236     // In all cases above, C remains dense and can be updated in-place
237     // C_replace must be false.  M can be valued or structural.
238 
239     #ifndef GBCOMPACT
240 
241     bool C_is_dense = GB_is_dense (C) && !GB_PENDING_OR_ZOMBIES (C) ;
242     bool A_is_dense = GB_is_dense (A1) ;
243     bool B_is_dense = GB_is_dense (B1) ;
244 
245     bool no_typecast =
246         (op->ztype == C->type)              // no typecasting of C
247         && (op->xtype == A1->type)          // no typecasting of A
248         && (op->ytype == B1->type) ;        // no typecasting of B
249 
250     bool any_bitmap =
251         GB_IS_BITMAP (C) ||
252         GB_IS_BITMAP (M) ||
253         GB_IS_BITMAP (A) ||
254         GB_IS_BITMAP (B) ;
255 
256     bool any_pending_work =
257         GB_ANY_PENDING_WORK (M1) ||
258         GB_ANY_PENDING_WORK (A1) ||
259         GB_ANY_PENDING_WORK (B1) ;
260 
261         // FUTURE: for sssp12:
262         // C<A> = A+B where C is sparse and B is dense;
263         // mask is structural, not complemented, C_replace is false.
264         // C is not empty.  Use a kernel that computes T<A>=A+B
265         // where T starts out empty; just iterate over the entries in A.
266 
267     if (A_is_dense                          // A and B are dense
268         && B_is_dense
269         && (M == NULL) && !Mask_comp        // no mask
270         && (C->is_csc == T_is_csc)          // no transpose of C
271         && no_typecast                      // no typecasting
272         && (opcode < GB_USER_opcode)        // not a user-defined operator
273         && (!op_is_positional)              // op is not positional
274         && !any_bitmap
275         && !any_pending_work)               // no matrix has pending work
276     {
277 
278         if (C_is_dense                      // C is dense
279         && accum == op                      // accum is same as the op
280         && (opcode >= GB_MIN_opcode)        // subset of binary operators
281         && (opcode <= GB_RDIV_opcode))
282         {
283 
284             //------------------------------------------------------------------
285             // C += A+B where all 3 matrices are dense
286             //------------------------------------------------------------------
287 
288             // C_replace is ignored
289             GBURBLE ("dense C+=A+B ") ;
290             GB_dense_ewise3_accum (C, A1, B1, op, Context) ;    // cannot fail
291             GB_FREE_ALL ;
292             ASSERT_MATRIX_OK (C, "C output for GB_ewise, dense C+=A+B", GB0) ;
293             return (GrB_SUCCESS) ;
294 
295         }
296         else if (accum == NULL)             // no accum
297         {
298 
299             //------------------------------------------------------------------
300             // C = A+B where A and B are dense (C is anything)
301             //------------------------------------------------------------------
302 
303             // C_replace is ignored
304             GBURBLE ("dense C=A+B ") ;
305             info = GB_dense_ewise3_noaccum (C, C_is_dense, A1, B1, op, Context);
306             GB_FREE_ALL ;
307             if (info == GrB_SUCCESS)
308             {
309                 ASSERT_MATRIX_OK (C, "C output for GB_ewise, dense C=A+B", GB0);
310             }
311             return (info) ;
312         }
313     }
314 
315     #endif
316 
317     //--------------------------------------------------------------------------
318     // T = A+B or A.*B, or with any mask M
319     //--------------------------------------------------------------------------
320 
321     bool mask_applied = false ;
322 
323     if (eWiseAdd)
324     {
325 
326         //----------------------------------------------------------------------
327         // T<any mask> = A+B
328         //----------------------------------------------------------------------
329 
330         // TODO: check the mask condition in GB_add_sparsity.
331         // Only exploit the mask in GB_add if it's more efficient than
332         // exploiting it later, probably this condition:
333 
334             // (accum == NULL) && (C->is_csc == T->is_csc)
335             // && (C_replace || GB_NNZ_UPPER_BOUND (C) == 0))
336 
337         // If that is true and the mask is applied, then T is transplanted as
338         // the final C and the mask is no longer needed.  In this case, it
339         // could be faster to exploit the mask duing GB_add.
340 
341         GB_OK (GB_add (T, T_type, T_is_csc, M1, Mask_struct, Mask_comp,
342             &mask_applied, A1, B1, op, Context)) ;
343 
344     }
345     else
346     {
347 
348         //----------------------------------------------------------------------
349         // T<any mask> = A.*B
350         //----------------------------------------------------------------------
351 
352         // T can be returned with shallow components derived from its inputs A1
353         // and/or B1.  In particular, if T is hypersparse, T->h may be a
354         // shallow copy of A1->h, B1->h, or M1->h.  T is hypersparse if any
355         // matrix A1, B1, or M1 are hypersparse.  Internally, T->h always
356         // starts as a shallow copy of A1->h, B1->h, or M1->h, but it may be
357         // pruned by GB_hypermatrix_prune, and thus no longer shallow.
358 
359         GB_OK (GB_emult (T, T_type, T_is_csc, M1, Mask_struct, Mask_comp,
360             &mask_applied, A1, B1, op, Context)) ;
361 
362         //----------------------------------------------------------------------
363         // transplant shallow content from AT, BT, or MT
364         //----------------------------------------------------------------------
365 
366         // If T is hypersparse, T->h is always a shallow copy of A1->h, B1->h,
367         // or M1->h.  Any of the three matrices A1, B1, or M1 may be temporary
368         // transposes, AT, BT, and MT respectively.  If T->h is a shallow cpoy
369         // of a temporary matrix, then flip the ownership of the T->h array,
370         // from the temporary matrix into T, so that T->h is not freed when AT,
371         // BT, and MT are freed.
372 
373         // GB_tranpose can return all kinds of shallow components, particularly
374         // when transposing vectors.  It can return AT->h as shallow copy of
375         // A->i, for example.
376 
377         if (T->h_shallow)
378         {
379             // T->h is shallow and T is hypersparse
380             ASSERT (GB_IS_HYPERSPARSE (T)) ;
381 
382             // one of A1, B1, or M1 is hypersparse
383             ASSERT (GB_IS_HYPERSPARSE (A1) || GB_IS_HYPERSPARSE (B1) ||
384                     GB_IS_HYPERSPARSE (M1))
385             if (A_transpose && T->h == A1->h)
386             {
387                 // A1 is the temporary matrix AT.  AT->h might itself be a
388                 // shallow copy of A->h or A->i, from GB_transpose.
389                 ASSERT (A1 == AT) ;
390                 T->h_shallow = AT->h_shallow ;
391                 AT->h_shallow = true ;
392             }
393             else if (B_transpose && T->h == B1->h)
394             {
395                 // B1 is the temporary matrix BT.  BT->h might itself be a
396                 // shallow copy of B->h or B->i, from GB_transpose.
397                 ASSERT (B1 == BT) ;
398                 T->h_shallow = BT->h_shallow ;
399                 BT->h_shallow = true ;
400             }
401             else if (M_transpose && T->h == M1->h)
402             {
403                 // M1 is the temporary matrix MT.  MT->h might itself be a
404                 // shallow copy of M->h or M->i, from GB_transpose.
405                 ASSERT (M1 == MT) ;
406                 T->h_shallow = MT->h_shallow ;
407                 MT->h_shallow = true ;
408             }
409 
410             // T->h may still be shallow, but if so, it is a shallow copy of
411             // some component of the user input matrices A, B, or M, and must
412             // remain shallow.  A deep copy of it will be made when T->h is
413             // transplanted into the result C.
414             ASSERT (GB_IMPLIES (T->h_shallow,
415                 (T->h == A1->h || T->h == B1->h ||
416                  (M1 != NULL && T->h == M1->h)))) ;
417         }
418     }
419 
420     //--------------------------------------------------------------------------
421     // free the transposed matrices
422     //--------------------------------------------------------------------------
423 
424     GB_phbix_free (AT) ;
425     GB_phbix_free (BT) ;
426 
427     //--------------------------------------------------------------------------
428     // C<M> = accum (C,T): accumulate the results into C via the mask
429     //--------------------------------------------------------------------------
430 
431     ASSERT_MATRIX_OK (T, "T from GB_ewise, prior to C<M>=accum(C,T)", GB0) ;
432 
433     if ((accum == NULL) && (C->is_csc == T->is_csc)
434         && (M == NULL || (M != NULL && mask_applied))
435         && (C_replace || GB_NNZ_UPPER_BOUND (C) == 0))
436     {
437         // C = 0 ; C = (ctype) T ; with the same CSR/CSC format.  The mask M
438         // (if any) has already been applied.  If C is also empty, or to be
439         // cleared anyway, and if accum is not present, then T can be
440         // transplanted directly into C, as C = (ctype) T, typecasting if
441         // needed.  If no typecasting is done then this takes no time at all
442         // and is a pure transplant.  Also conform C to its desired
443         // hypersparsity.
444         GB_phbix_free (MT) ;
445         GB_OK (GB_transplant_conform (C, C->type, &T, Context)) ;
446         return (GB_block (C, Context)) ;
447     }
448     else
449     {
450         // C<M> = accum (C,T)
451         // GB_accum_mask also conforms C to its desired hypersparsity
452         info = GB_accum_mask (C, M, MT, accum, &T, C_replace, Mask_comp,
453             Mask_struct, Context) ;
454         GB_phbix_free (MT) ;
455         return (info) ;
456     }
457 }
458 
459