1 //------------------------------------------------------------------------------
2 // GB_add: C = A+B, C<M>=A+B, and C<!M>=A+B
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // GB_add computes C=A+B, C<M>=A+B, or C<!M>=A+B using the given operator
11 // element-wise on the matrices A and B. The result is typecasted as needed.
12 // The pattern of C is the union of the pattern of A and B, intersection with
13 // the mask M, if present.
14
15 // Let the op be z=f(x,y) where x, y, and z have type xtype, ytype, and ztype.
16 // If both A(i,j) and B(i,j) are present, then:
17
18 // C(i,j) = (ctype) op ((xtype) A(i,j), (ytype) B(i,j))
19
20 // If just A(i,j) is present but not B(i,j), then:
21
22 // C(i,j) = (ctype) A (i,j)
23
24 // If just B(i,j) is present but not A(i,j), then:
25
26 // C(i,j) = (ctype) B (i,j)
27
28 // ctype is the type of matrix C. The pattern of C is the union of A and B.
29
30 // op may be NULL. In this case, the intersection of A and B must be empty.
31 // This is used by GB_Matrix_wait only, for merging the pending tuple matrix T
32 // into A. In this case, the result C is always sparse or hypersparse, not
33 // bitmap or full. Any duplicate pending tuples have already been summed in T,
34 // so the intersection of T and A is always empty.
35
36 // Some methods should not exploit the mask, but leave it for later.
37 // See GB_ewise and GB_accum_mask: the only places where this function is
38 // called with a non-null mask M. Both of those callers can handle the
39 // mask being applied later. GB_add_sparsity determines whether or not the
40 // mask should be applied now, or later.
41
42 #include "GB_add.h"
43
44 #define GB_FREE_ALL ;
45
46 GB_PUBLIC // accessed by the MATLAB tests in GraphBLAS/Test only
GB_add(GrB_Matrix C,const GrB_Type ctype,const bool C_is_csc,const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,bool * mask_applied,const GrB_Matrix A,const GrB_Matrix B,const GrB_BinaryOp op,GB_Context Context)47 GrB_Info GB_add // C=A+B, C<M>=A+B, or C<!M>=A+B
48 (
49 GrB_Matrix C, // output matrix, static header
50 const GrB_Type ctype, // type of output matrix C
51 const bool C_is_csc, // format of output matrix C
52 const GrB_Matrix M, // optional mask for C, unused if NULL
53 const bool Mask_struct, // if true, use the only structure of M
54 const bool Mask_comp, // if true, use !M
55 bool *mask_applied, // if true, the mask was applied
56 const GrB_Matrix A, // input A matrix
57 const GrB_Matrix B, // input B matrix
58 const GrB_BinaryOp op, // op to perform C = op (A,B)
59 GB_Context Context
60 )
61 {
62
63 //--------------------------------------------------------------------------
64 // check inputs
65 //--------------------------------------------------------------------------
66
67 GrB_Info info ;
68
69 ASSERT (C != NULL && C->static_header) ;
70
71 ASSERT (mask_applied != NULL) ;
72 (*mask_applied) = false ;
73
74 ASSERT_MATRIX_OK (A, "A for add", GB0) ;
75 ASSERT_MATRIX_OK (B, "B for add", GB0) ;
76 ASSERT_BINARYOP_OK_OR_NULL (op, "op for add", GB0) ;
77 ASSERT_MATRIX_OK_OR_NULL (M, "M for add", GB0) ;
78 ASSERT (A->vdim == B->vdim && A->vlen == B->vlen) ;
79 ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim && A->vlen == M->vlen)) ;
80
81 //--------------------------------------------------------------------------
82 // delete any lingering zombies and assemble any pending tuples
83 //--------------------------------------------------------------------------
84
85 // TODO: some cases can allow M, A, and/or B to be jumbled
86 GB_MATRIX_WAIT (M) ; // cannot be jumbled
87 GB_MATRIX_WAIT (A) ; // cannot be jumbled
88 GB_MATRIX_WAIT (B) ; // cannot be jumbled
89
90 //--------------------------------------------------------------------------
91 // determine the sparsity of C
92 //--------------------------------------------------------------------------
93
94 bool apply_mask ;
95 int C_sparsity = GB_add_sparsity (&apply_mask, M, Mask_comp, A, B) ;
96
97 //--------------------------------------------------------------------------
98 // initializations
99 //--------------------------------------------------------------------------
100
101 int64_t Cnvec, Cnvec_nonempty ;
102 int64_t *Cp = NULL ; size_t Cp_size = 0 ;
103 int64_t *Ch = NULL ; size_t Ch_size = 0 ;
104 int64_t *C_to_M = NULL ; size_t C_to_M_size = 0 ;
105 int64_t *C_to_A = NULL ; size_t C_to_A_size = 0 ;
106 int64_t *C_to_B = NULL ; size_t C_to_B_size = 0 ;
107 bool Ch_is_Mh ;
108 int C_ntasks = 0, C_nthreads ;
109 GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ;
110
111 //--------------------------------------------------------------------------
112 // phase0: finalize the sparsity C and find the vectors in C
113 //--------------------------------------------------------------------------
114
115 info = GB_add_phase0 (
116 // computed by by phase0:
117 &Cnvec, &Ch, &Ch_size, &C_to_M, &C_to_M_size, &C_to_A, &C_to_A_size,
118 &C_to_B, &C_to_B_size, &Ch_is_Mh,
119 // input/output to phase0:
120 &C_sparsity,
121 // original input:
122 (apply_mask) ? M : NULL, A, B, Context) ;
123 if (info != GrB_SUCCESS)
124 {
125 // out of memory
126 return (info) ;
127 }
128
129 GBURBLE ("add:(%s<%s>=%s+%s) ",
130 GB_sparsity_char (C_sparsity),
131 GB_sparsity_char_matrix (M),
132 GB_sparsity_char_matrix (A),
133 GB_sparsity_char_matrix (B)) ;
134
135 //--------------------------------------------------------------------------
136 // phase1: split C into tasks, and count entries in each vector of C
137 //--------------------------------------------------------------------------
138
139 if (C_sparsity == GxB_SPARSE || C_sparsity == GxB_HYPERSPARSE)
140 {
141
142 //----------------------------------------------------------------------
143 // C is sparse or hypersparse: slice and analyze the C matrix
144 //----------------------------------------------------------------------
145
146 // phase1a: split C into tasks
147 info = GB_ewise_slice (
148 // computed by phase1a
149 &TaskList, &TaskList_size, &C_ntasks, &C_nthreads,
150 // computed by phase0:
151 Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
152 // original input:
153 (apply_mask) ? M : NULL, A, B, Context) ;
154 if (info != GrB_SUCCESS)
155 {
156 // out of memory; free everything allocated by GB_add_phase0
157 GB_FREE (&Ch, Ch_size) ;
158 GB_FREE_WERK (&C_to_M, C_to_M_size) ;
159 GB_FREE_WERK (&C_to_A, C_to_A_size) ;
160 GB_FREE_WERK (&C_to_B, C_to_B_size) ;
161 return (info) ;
162 }
163
164 // count the number of entries in each vector of C
165 info = GB_add_phase1 (
166 // computed or used by phase1:
167 &Cp, &Cp_size, &Cnvec_nonempty, op == NULL,
168 // from phase1a:
169 TaskList, C_ntasks, C_nthreads,
170 // from phase0:
171 Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
172 // original input:
173 (apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Context) ;
174 if (info != GrB_SUCCESS)
175 {
176 // out of memory; free everything allocated by GB_add_phase0
177 GB_FREE_WERK (&TaskList, TaskList_size) ;
178 GB_FREE (&Ch, Ch_size) ;
179 GB_FREE_WERK (&C_to_M, C_to_M_size) ;
180 GB_FREE_WERK (&C_to_A, C_to_A_size) ;
181 GB_FREE_WERK (&C_to_B, C_to_B_size) ;
182 return (info) ;
183 }
184
185 }
186 else
187 {
188
189 //----------------------------------------------------------------------
190 // C is bitmap or full: only determine how many threads to use
191 //----------------------------------------------------------------------
192
193 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
194 C_nthreads = GB_nthreads (A->vlen * A->vdim, chunk, nthreads_max) ;
195 }
196
197 //--------------------------------------------------------------------------
198 // phase2: compute the entries (indices and values) in each vector of C
199 //--------------------------------------------------------------------------
200
201 // Cp and Ch are either freed by phase2, or transplanted into C.
202 // Either way, they are not freed here.
203
204 info = GB_add_phase2 (
205 // computed or used by phase2:
206 C, ctype, C_is_csc, op,
207 // from phase1
208 &Cp, Cp_size, Cnvec_nonempty,
209 // from phase1a:
210 TaskList, C_ntasks, C_nthreads,
211 // from phase0:
212 Cnvec, &Ch, Ch_size, C_to_M, C_to_A, C_to_B, Ch_is_Mh, C_sparsity,
213 // original input:
214 (apply_mask) ? M : NULL, Mask_struct, Mask_comp, A, B, Context) ;
215
216 // Ch and Cp must not be freed; they are now C->h and C->p.
217 // If the method failed, Cp and Ch have already been freed.
218
219 // free workspace
220 GB_FREE_WERK (&TaskList, TaskList_size) ;
221 GB_FREE_WERK (&C_to_M, C_to_M_size) ;
222 GB_FREE_WERK (&C_to_A, C_to_A_size) ;
223 GB_FREE_WERK (&C_to_B, C_to_B_size) ;
224
225 if (info != GrB_SUCCESS)
226 {
227 // out of memory
228 return (info) ;
229 }
230
231 //--------------------------------------------------------------------------
232 // return result
233 //--------------------------------------------------------------------------
234
235 ASSERT_MATRIX_OK (C, "C output for add", GB0) ;
236 (*mask_applied) = apply_mask ;
237 return (GrB_SUCCESS) ;
238 }
239
240