1 //------------------------------------------------------------------------------
2 // GB_add: C = A+B, C<M>=A+B, and C<!M>=A+B
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // GB_add computes C=A+B, C<M>=A+B, or C<!M>=A+B using the given operator
11 // element-wise on the matrices A and B.  The result is typecasted as needed.
12 // The pattern of C is the union of the pattern of A and B, intersection with
13 // the mask M, if present.
14 
15 // Let the op be z=f(x,y) where x, y, and z have type xtype, ytype, and ztype.
16 // If both A(i,j) and B(i,j) are present, then:
17 
18 //      C(i,j) = (ctype) op ((xtype) A(i,j), (ytype) B(i,j))
19 
20 // If just A(i,j) is present but not B(i,j), then:
21 
22 //      C(i,j) = (ctype) A (i,j)
23 
24 // If just B(i,j) is present but not A(i,j), then:
25 
26 //      C(i,j) = (ctype) B (i,j)
27 
28 // ctype is the type of matrix C.  The pattern of C is the union of A and B.
29 
30 // op may be NULL.  In this case, the intersection of A and B must be empty.
31 // This is used by GB_Matrix_wait only, for merging the pending tuple matrix T
32 // into A.  In this case, the result C is always sparse or hypersparse, not
33 // bitmap or full.  Any duplicate pending tuples have already been summed in T,
34 // so the intersection of T and A is always empty.
35 
36 // Some methods should not exploit the mask, but leave it for later.
37 // See GB_ewise and GB_accum_mask: the only places where this function is
38 // called with a non-null mask M.  Both of those callers can handle the
39 // mask being applied later.  GB_add_sparsity determines whether or not the
40 // mask should be applied now, or later.
41 
42 #include "GB_add.h"
43 
44 #define GB_FREE_ALL ;
45 
46 GB_PUBLIC   // accessed by the MATLAB tests in GraphBLAS/Test only
GB_add(GrB_Matrix C,const GrB_Type ctype,const bool C_is_csc,const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,bool * mask_applied,const GrB_Matrix A,const GrB_Matrix B,const GrB_BinaryOp op,GB_Context Context)47 GrB_Info GB_add             // C=A+B, C<M>=A+B, or C<!M>=A+B
48 (
49     GrB_Matrix C,           // output matrix, static header
50     const GrB_Type ctype,   // type of output matrix C
51     const bool C_is_csc,    // format of output matrix C
52     const GrB_Matrix M,     // optional mask for C, unused if NULL
53     const bool Mask_struct, // if true, use the only structure of M
54     const bool Mask_comp,   // if true, use !M
55     bool *mask_applied,     // if true, the mask was applied
56     const GrB_Matrix A,     // input A matrix
57     const GrB_Matrix B,     // input B matrix
58     const GrB_BinaryOp op,  // op to perform C = op (A,B)
59     GB_Context Context
60 )
61 {
62 
63     //--------------------------------------------------------------------------
64     // check inputs
65     //--------------------------------------------------------------------------
66 
67     GrB_Info info ;
68 
69     ASSERT (C != NULL && C->static_header) ;
70 
71     ASSERT (mask_applied != NULL) ;
72     (*mask_applied) = false ;
73 
74     ASSERT_MATRIX_OK (A, "A for add", GB0) ;
75     ASSERT_MATRIX_OK (B, "B for add", GB0) ;
76     ASSERT_BINARYOP_OK_OR_NULL (op, "op for add", GB0) ;
77     ASSERT_MATRIX_OK_OR_NULL (M, "M for add", GB0) ;
78     ASSERT (A->vdim == B->vdim && A->vlen == B->vlen) ;
79     ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim && A->vlen == M->vlen)) ;
80 
81     //--------------------------------------------------------------------------
82     // delete any lingering zombies and assemble any pending tuples
83     //--------------------------------------------------------------------------
84 
85     // TODO: some cases can allow M, A, and/or B to be jumbled
86     GB_MATRIX_WAIT (M) ;        // cannot be jumbled
87     GB_MATRIX_WAIT (A) ;        // cannot be jumbled
88     GB_MATRIX_WAIT (B) ;        // cannot be jumbled
89 
90     //--------------------------------------------------------------------------
91     // determine the sparsity of C
92     //--------------------------------------------------------------------------
93 
94     bool apply_mask ;
95     int C_sparsity = GB_add_sparsity (&apply_mask, M, Mask_comp, A, B) ;
96 
97     //--------------------------------------------------------------------------
98     // initializations
99     //--------------------------------------------------------------------------
100 
101     int64_t Cnvec, Cnvec_nonempty ;
102     int64_t *Cp     = NULL ; size_t Cp_size = 0 ;
103     int64_t *Ch     = NULL ; size_t Ch_size = 0 ;
104     int64_t *C_to_M = NULL ; size_t C_to_M_size = 0 ;
105     int64_t *C_to_A = NULL ; size_t C_to_A_size = 0 ;
106     int64_t *C_to_B = NULL ; size_t C_to_B_size = 0 ;
107     bool Ch_is_Mh ;
108     int C_ntasks = 0, C_nthreads ;
109     GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ;
110 
111     //--------------------------------------------------------------------------
112     // phase0: finalize the sparsity C and find the vectors in C
113     //--------------------------------------------------------------------------
114 
115     info = GB_add_phase0 (
116         // computed by by phase0:
117         &Cnvec, &Ch, &Ch_size, &C_to_M, &C_to_M_size, &C_to_A, &C_to_A_size,
118         &C_to_B, &C_to_B_size, &Ch_is_Mh,
119         // input/output to phase0:
120         &C_sparsity,
121         // original input:
122         (apply_mask) ? M : NULL, A, B, Context) ;
123     if (info != GrB_SUCCESS)
124     {
125         // out of memory
126         return (info) ;
127     }
128 
129     GBURBLE ("add:(%s<%s>=%s+%s) ",
130         GB_sparsity_char (C_sparsity),
131         GB_sparsity_char_matrix (M),
132         GB_sparsity_char_matrix (A),
133         GB_sparsity_char_matrix (B)) ;
134 
135     //--------------------------------------------------------------------------
136     // phase1: split C into tasks, and count entries in each vector of C
137     //--------------------------------------------------------------------------
138 
139     if (C_sparsity == GxB_SPARSE || C_sparsity == GxB_HYPERSPARSE)
140     {
141 
142         //----------------------------------------------------------------------
143         // C is sparse or hypersparse: slice and analyze the C matrix
144         //----------------------------------------------------------------------
145 
146         // phase1a: split C into tasks
147         info = GB_ewise_slice (
148             // computed by phase1a
149             &TaskList, &TaskList_size, &C_ntasks, &C_nthreads,
150             // computed by phase0:
151             Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
152             // original input:
153             (apply_mask) ? M : NULL, A, B, Context) ;
154         if (info != GrB_SUCCESS)
155         {
156             // out of memory; free everything allocated by GB_add_phase0
157             GB_FREE (&Ch, Ch_size) ;
158             GB_FREE_WERK (&C_to_M, C_to_M_size) ;
159             GB_FREE_WERK (&C_to_A, C_to_A_size) ;
160             GB_FREE_WERK (&C_to_B, C_to_B_size) ;
161             return (info) ;
162         }
163 
164         // count the number of entries in each vector of C
165         info = GB_add_phase1 (
166             // computed or used by phase1:
167             &Cp, &Cp_size, &Cnvec_nonempty, op == NULL,
168             // from phase1a:
169             TaskList, C_ntasks, C_nthreads,
170             // from phase0:
171             Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
172             // original input:
173             (apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Context) ;
174         if (info != GrB_SUCCESS)
175         {
176             // out of memory; free everything allocated by GB_add_phase0
177             GB_FREE_WERK (&TaskList, TaskList_size) ;
178             GB_FREE (&Ch, Ch_size) ;
179             GB_FREE_WERK (&C_to_M, C_to_M_size) ;
180             GB_FREE_WERK (&C_to_A, C_to_A_size) ;
181             GB_FREE_WERK (&C_to_B, C_to_B_size) ;
182             return (info) ;
183         }
184 
185     }
186     else
187     {
188 
189         //----------------------------------------------------------------------
190         // C is bitmap or full: only determine how many threads to use
191         //----------------------------------------------------------------------
192 
193         GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
194         C_nthreads = GB_nthreads (A->vlen * A->vdim, chunk, nthreads_max) ;
195     }
196 
197     //--------------------------------------------------------------------------
198     // phase2: compute the entries (indices and values) in each vector of C
199     //--------------------------------------------------------------------------
200 
201     // Cp and Ch are either freed by phase2, or transplanted into C.
202     // Either way, they are not freed here.
203 
204     info = GB_add_phase2 (
205         // computed or used by phase2:
206         C, ctype, C_is_csc, op,
207         // from phase1
208         &Cp, Cp_size, Cnvec_nonempty,
209         // from phase1a:
210         TaskList, C_ntasks, C_nthreads,
211         // from phase0:
212         Cnvec, &Ch, Ch_size, C_to_M, C_to_A, C_to_B, Ch_is_Mh, C_sparsity,
213         // original input:
214         (apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Context) ;
215 
216     // Ch and Cp must not be freed; they are now C->h and C->p.
217     // If the method failed, Cp and Ch have already been freed.
218 
219     // free workspace
220     GB_FREE_WERK (&TaskList, TaskList_size) ;
221     GB_FREE_WERK (&C_to_M, C_to_M_size) ;
222     GB_FREE_WERK (&C_to_A, C_to_A_size) ;
223     GB_FREE_WERK (&C_to_B, C_to_B_size) ;
224 
225     if (info != GrB_SUCCESS)
226     {
227         // out of memory
228         return (info) ;
229     }
230 
231     //--------------------------------------------------------------------------
232     // return result
233     //--------------------------------------------------------------------------
234 
235     ASSERT_MATRIX_OK (C, "C output for add", GB0) ;
236     (*mask_applied) = apply_mask ;
237     return (GrB_SUCCESS) ;
238 }
239 
240