1 //------------------------------------------------------------------------------
2 // GB_emult_03: C<M>= A.*B, M sparse/hyper, A and B bitmap/full
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // C<M>= A.*B, M sparse/hyper, A and B bitmap/full. C has the same sparsity
11 // structure as M, and its pattern is a subset of M.
12
13 // ------------------------------------------
14 // C <M>= A .* B
15 // ------------------------------------------
16 // sparse sparse bitmap bitmap (method: 03)
17 // sparse sparse bitmap full (method: 03)
18 // sparse sparse full bitmap (method: 03)
19 // sparse sparse full full (method: 03)
20
21 // TODO: this function can also do eWiseAdd, just as easily.
22 // Just change the "&&" to "||" in the GB_emult_03_template.
23 // If A and B are both full, eadd and emult are identical.
24
25 #include "GB_ewise.h"
26 #include "GB_emult.h"
27 #include "GB_binop.h"
28 #include "GB_unused.h"
29 #ifndef GBCOMPACT
30 #include "GB_binop__include.h"
31 #endif
32
33 #define GB_FREE_WORK \
34 { \
35 GB_WERK_POP (Work, int64_t) ; \
36 GB_WERK_POP (M_ek_slicing, int64_t) ; \
37 }
38
39 #define GB_FREE_ALL \
40 { \
41 GB_FREE_WORK ; \
42 GB_phbix_free (C) ; \
43 }
44
GB_emult_03(GrB_Matrix C,const GrB_Type ctype,const bool C_is_csc,const GrB_Matrix M,const bool Mask_struct,bool * mask_applied,const GrB_Matrix A,const GrB_Matrix B,const GrB_BinaryOp op,GB_Context Context)45 GrB_Info GB_emult_03 // C<M>=A.*B, M sparse/hyper, A and B bitmap/full
46 (
47 GrB_Matrix C, // output matrix, static header
48 const GrB_Type ctype, // type of output matrix C
49 const bool C_is_csc, // format of output matrix C
50 const GrB_Matrix M, // sparse/hyper, not NULL
51 const bool Mask_struct, // if true, use the only structure of M
52 bool *mask_applied, // if true, the mask was applied
53 const GrB_Matrix A, // input A matrix (bitmap/full)
54 const GrB_Matrix B, // input B matrix (bitmap/full)
55 const GrB_BinaryOp op, // op to perform C = op (A,B)
56 GB_Context Context
57 )
58 {
59
60 //--------------------------------------------------------------------------
61 // check inputs
62 //--------------------------------------------------------------------------
63
64 GrB_Info info ;
65 ASSERT (C != NULL && C->static_header) ;
66
67 ASSERT_MATRIX_OK (M, "M for emult_03", GB0) ;
68 ASSERT_MATRIX_OK (A, "A for emult_03", GB0) ;
69 ASSERT_MATRIX_OK (B, "B for emult_03", GB0) ;
70 ASSERT_BINARYOP_OK (op, "op for emult_03", GB0) ;
71
72 ASSERT (GB_IS_SPARSE (M) || GB_IS_HYPERSPARSE (M)) ;
73 ASSERT (!GB_PENDING (M)) ;
74 ASSERT (GB_JUMBLED_OK (M)) ;
75 ASSERT (!GB_ZOMBIES (M)) ;
76 ASSERT (GB_IS_BITMAP (A) || GB_IS_FULL (A) || GB_as_if_full (A)) ;
77 ASSERT (GB_IS_BITMAP (B) || GB_IS_FULL (B) || GB_as_if_full (B)) ;
78
79 int C_sparsity = GB_sparsity (M) ;
80
81 GBURBLE ("emult_03:(%s<%s>=%s.*%s) ",
82 GB_sparsity_char (C_sparsity),
83 GB_sparsity_char_matrix (M),
84 GB_sparsity_char_matrix (A),
85 GB_sparsity_char_matrix (B)) ;
86
87 //--------------------------------------------------------------------------
88 // declare workspace
89 //--------------------------------------------------------------------------
90
91 GB_WERK_DECLARE (Work, int64_t) ;
92 int64_t *restrict Wfirst = NULL ;
93 int64_t *restrict Wlast = NULL ;
94 int64_t *restrict Cp_kfirst = NULL ;
95 GB_WERK_DECLARE (M_ek_slicing, int64_t) ;
96
97 //--------------------------------------------------------------------------
98 // get M, A, and B
99 //--------------------------------------------------------------------------
100
101 const int64_t *restrict Mp = M->p ;
102 const int64_t *restrict Mh = M->h ;
103 const int64_t *restrict Mi = M->i ;
104 const GB_void *restrict Mx = (Mask_struct) ? NULL : (GB_void *) M->x ;
105 const int64_t vlen = M->vlen ;
106 const int64_t vdim = M->vdim ;
107 const int64_t nvec = M->nvec ;
108 const int64_t mnz = GB_NNZ (M) ;
109 const size_t msize = M->type->size ;
110
111 const int8_t *restrict Ab = A->b ;
112 const int8_t *restrict Bb = B->b ;
113
114 //--------------------------------------------------------------------------
115 // allocate C->p and C->h
116 //--------------------------------------------------------------------------
117
118 GB_OK (GB_new (&C, true, // sparse or hyper (same as M), static header
119 ctype, vlen, vdim, GB_Ap_calloc, C_is_csc,
120 C_sparsity, M->hyper_switch, nvec, Context)) ;
121 int64_t *restrict Cp = C->p ;
122
123 //--------------------------------------------------------------------------
124 // slice the mask matrix M
125 //--------------------------------------------------------------------------
126
127 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
128 int M_ntasks, M_nthreads ;
129 GB_SLICE_MATRIX (M, 8, chunk) ;
130
131 //--------------------------------------------------------------------------
132 // allocate workspace
133 //--------------------------------------------------------------------------
134
135 GB_WERK_PUSH (Work, 3*M_ntasks, int64_t) ;
136 if (Work == NULL)
137 {
138 // out of memory
139 GB_FREE_ALL ;
140 return (GrB_OUT_OF_MEMORY) ;
141 }
142 Wfirst = Work ;
143 Wlast = Work + M_ntasks ;
144 Cp_kfirst = Work + M_ntasks * 2 ;
145
146 //--------------------------------------------------------------------------
147 // count entries in C
148 //--------------------------------------------------------------------------
149
150 // This phase is very similar to GB_select_phase1 (GB_ENTRY_SELECTOR).
151
152 // TODO: if M is structural and A and B are both full, then C has exactly
153 // the same pattern as M, the first phase can be skipped.
154
155 int tid ;
156 #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1)
157 for (tid = 0 ; tid < M_ntasks ; tid++)
158 {
159 int64_t kfirst = kfirst_Mslice [tid] ;
160 int64_t klast = klast_Mslice [tid] ;
161 Wfirst [tid] = 0 ;
162 Wlast [tid] = 0 ;
163 for (int64_t k = kfirst ; k <= klast ; k++)
164 {
165 // count the entries in C(:,j)
166 int64_t j = GBH (Mh, k) ;
167 int64_t pstart = j * vlen ; // start of A(:,j) and B(:,j)
168 int64_t pM, pM_end ;
169 GB_get_pA (&pM, &pM_end, tid, k,
170 kfirst, klast, pstart_Mslice, Mp, vlen) ;
171 int64_t cjnz = 0 ;
172 for ( ; pM < pM_end ; pM++)
173 {
174 bool mij = GB_mcast (Mx, pM, msize) ;
175 if (mij)
176 {
177 int64_t i = Mi [pM] ;
178 cjnz +=
179 (GBB (Ab, pstart + i)
180 && // TODO: for GB_add, use || instead
181 GBB (Bb, pstart + i)) ;
182 }
183 }
184 if (k == kfirst)
185 {
186 Wfirst [tid] = cjnz ;
187 }
188 else if (k == klast)
189 {
190 Wlast [tid] = cjnz ;
191 }
192 else
193 {
194 Cp [k] = cjnz ;
195 }
196 }
197 }
198
199 //--------------------------------------------------------------------------
200 // finalize Cp, cumulative sum of Cp and compute Cp_kfirst
201 //--------------------------------------------------------------------------
202
203 GB_ek_slice_merge1 (Cp, Wfirst, Wlast, M_ek_slicing, M_ntasks) ;
204 GB_ek_slice_merge2 (&(C->nvec_nonempty), Cp_kfirst, Cp, nvec,
205 Wfirst, Wlast, M_ek_slicing, M_ntasks, M_nthreads, Context) ;
206
207 //--------------------------------------------------------------------------
208 // allocate C->i and C->x
209 //--------------------------------------------------------------------------
210
211 int64_t cnz = Cp [nvec] ;
212 GB_OK (GB_bix_alloc (C, cnz, false, false, true, true, Context)) ;
213
214 //--------------------------------------------------------------------------
215 // copy pattern into C
216 //--------------------------------------------------------------------------
217
218 // TODO: could make these components of C shallow instead
219
220 if (GB_IS_HYPERSPARSE (M))
221 {
222 // copy M->h into C->h
223 GB_memcpy (C->h, Mh, nvec * sizeof (int64_t), M_nthreads) ;
224 }
225
226 C->nvec = nvec ;
227 C->jumbled = M->jumbled ;
228 C->magic = GB_MAGIC ;
229
230 //--------------------------------------------------------------------------
231 // get the opcode
232 //--------------------------------------------------------------------------
233
234 GB_Opcode opcode = op->opcode ;
235 bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
236 bool op_is_first = (opcode == GB_FIRST_opcode) ;
237 bool op_is_second = (opcode == GB_SECOND_opcode) ;
238 bool op_is_pair = (opcode == GB_PAIR_opcode) ;
239 GB_Type_code ccode = ctype->code ;
240
241 //--------------------------------------------------------------------------
242 // check if the values of A and/or B are ignored
243 //--------------------------------------------------------------------------
244
245 // With C = ewisemult (A,B), only the intersection of A and B is used.
246 // If op is SECOND or PAIR, the values of A are never accessed.
247 // If op is FIRST or PAIR, the values of B are never accessed.
248 // If op is PAIR, the values of A and B are never accessed.
249 // Contrast with ewiseadd.
250
251 // A is passed as x, and B as y, in z = op(x,y)
252 bool A_is_pattern = op_is_second || op_is_pair || op_is_positional ;
253 bool B_is_pattern = op_is_first || op_is_pair || op_is_positional ;
254
255 //--------------------------------------------------------------------------
256 // using a built-in binary operator (except for positional operators)
257 //--------------------------------------------------------------------------
258
259 bool done = false ;
260
261 #ifndef GBCOMPACT
262
263 //----------------------------------------------------------------------
264 // define the worker for the switch factory
265 //----------------------------------------------------------------------
266
267 #define GB_AemultB_03(mult,xname) GB (_AemultB_03_ ## mult ## xname)
268
269 #define GB_BINOP_WORKER(mult,xname) \
270 { \
271 info = GB_AemultB_03(mult,xname) (C, M, Mask_struct, A, B, \
272 Cp_kfirst, M_ek_slicing, M_ntasks, M_nthreads) ; \
273 done = (info != GrB_NO_VALUE) ; \
274 } \
275 break ;
276
277 //----------------------------------------------------------------------
278 // launch the switch factory
279 //----------------------------------------------------------------------
280
281 GB_Type_code xcode, ycode, zcode ;
282 if (!op_is_positional &&
283 GB_binop_builtin (A->type, A_is_pattern, B->type, B_is_pattern,
284 op, false, &opcode, &xcode, &ycode, &zcode) && ccode == zcode)
285 {
286 #include "GB_binop_factory.c"
287 }
288
289 #endif
290
291 //--------------------------------------------------------------------------
292 // generic worker
293 //--------------------------------------------------------------------------
294
295 if (!done)
296 {
297 GB_BURBLE_MATRIX (C, "(generic emult_03: %s) ", op->name) ;
298 GB_ewise_generic (C, op, NULL, 0, 0,
299 NULL, NULL, NULL, C_sparsity, GB_EMULT_METHOD_03, Cp_kfirst,
300 M_ek_slicing, M_ntasks, M_nthreads, NULL, 0, 0, NULL, 0, 0,
301 M, Mask_struct, false, A, B, Context) ;
302 }
303
304 //--------------------------------------------------------------------------
305 // remove empty vectors from C, if hypersparse
306 //--------------------------------------------------------------------------
307
308 GB_OK (GB_hypermatrix_prune (C, Context)) ;
309
310 //--------------------------------------------------------------------------
311 // free workspace and return result
312 //--------------------------------------------------------------------------
313
314 GB_FREE_WORK ;
315 ASSERT_MATRIX_OK (C, "C output for emult_03", GB0) ;
316 (*mask_applied) = true ;
317 return (GrB_SUCCESS) ;
318 }
319
320