1 //------------------------------------------------------------------------------
2 // GB_emult_02: C = A.*B where A is sparse/hyper and B is bitmap/full
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // C = A.*B where A is sparse/hyper and B is bitmap/full constructs C with
11 // the same sparsity structure as A. This method can also be called with
12 // the two input matrices swapped, with flipxy true, to handle the case
13 // where A is bitmap/full and B is sparse/hyper.
14
15 // When no mask is present, or the mask is applied later, this method handles
16 // the following cases:
17
18 // ------------------------------------------
19 // C = A .* B
20 // ------------------------------------------
21 // sparse . sparse bitmap
22 // sparse . sparse full
23 // sparse . bitmap sparse
24 // sparse . full sparse
25
26 // If M is sparse/hyper and complemented, it is not passed here:
27
28 // ------------------------------------------
29 // C <!M>= A .* B
30 // ------------------------------------------
31 // sparse sparse sparse bitmap (mask later)
32 // sparse sparse sparse full (mask later)
33 // sparse sparse bitmap sparse (mask later)
34 // sparse sparse full sparse (mask later)
35
36 // If M is present, it is bitmap/full:
37
38 // ------------------------------------------
39 // C <M> = A .* B
40 // ------------------------------------------
41 // sparse bitmap sparse bitmap
42 // sparse bitmap sparse full
43 // sparse bitmap bitmap sparse
44 // sparse bitmap full sparse
45
46 // ------------------------------------------
47 // C <M> = A .* B
48 // ------------------------------------------
49 // sparse full sparse bitmap
50 // sparse full sparse full
51 // sparse full bitmap sparse
52 // sparse full full sparse
53
54 // ------------------------------------------
55 // C <!M> = A .* B
56 // ------------------------------------------
57 // sparse bitmap sparse bitmap
58 // sparse bitmap sparse full
59 // sparse bitmap bitmap sparse
60 // sparse bitmap full sparse
61
62 // ------------------------------------------
63 // C <!M> = A .* B
64 // ------------------------------------------
65 // sparse full sparse bitmap
66 // sparse full sparse full
67 // sparse full bitmap sparse
68 // sparse full full sparse
69
70 #include "GB_ewise.h"
71 #include "GB_emult.h"
72 #include "GB_binop.h"
73 #include "GB_unused.h"
74 #ifndef GBCOMPACT
75 #include "GB_binop__include.h"
76 #endif
77
78 #define GB_FREE_WORK \
79 { \
80 GB_WERK_POP (Work, int64_t) ; \
81 GB_WERK_POP (A_ek_slicing, int64_t) ; \
82 }
83
84 #define GB_FREE_ALL \
85 { \
86 GB_FREE_WORK ; \
87 GB_phbix_free (C) ; \
88 }
89
GB_emult_02(GrB_Matrix C,const GrB_Type ctype,const bool C_is_csc,const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,const GrB_Matrix A,const GrB_Matrix B,GrB_BinaryOp op,bool flipxy,GB_Context Context)90 GrB_Info GB_emult_02 // C=A.*B when A is sparse/hyper, B bitmap/full
91 (
92 GrB_Matrix C, // output matrix, static header
93 const GrB_Type ctype, // type of output matrix C
94 const bool C_is_csc, // format of output matrix C
95 const GrB_Matrix M, // optional mask, unused if NULL
96 const bool Mask_struct, // if true, use the only structure of M
97 const bool Mask_comp, // if true, use !M
98 const GrB_Matrix A, // input A matrix (sparse/hyper)
99 const GrB_Matrix B, // input B matrix (bitmap/full)
100 GrB_BinaryOp op, // op to perform C = op (A,B)
101 bool flipxy, // if true use fmult(y,x) else fmult(x,y)
102 GB_Context Context
103 )
104 {
105
106 //--------------------------------------------------------------------------
107 // check inputs
108 //--------------------------------------------------------------------------
109
110 GrB_Info info ;
111 ASSERT (C != NULL && C->static_header) ;
112
113 ASSERT_MATRIX_OK_OR_NULL (M, "M for emult_02", GB0) ;
114 ASSERT_MATRIX_OK (A, "A for emult_02", GB0) ;
115 ASSERT_MATRIX_OK (B, "B for emult_02", GB0) ;
116 ASSERT_BINARYOP_OK (op, "op for emult_02", GB0) ;
117 ASSERT_TYPE_OK (ctype, "ctype for emult_02", GB0) ;
118
119 ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A)) ;
120 ASSERT (!GB_PENDING (A)) ;
121 ASSERT (GB_JUMBLED_OK (A)) ;
122 ASSERT (!GB_ZOMBIES (A)) ;
123 ASSERT (GB_IS_BITMAP (B) || GB_IS_FULL (B)) ;
124 ASSERT (M == NULL || GB_IS_BITMAP (B) || GB_IS_FULL (B)) ;
125
126 int C_sparsity = GB_sparsity (A) ;
127
128 if (M == NULL)
129 {
130 GBURBLE ("emult_02:(%s=%s.*%s)",
131 GB_sparsity_char (C_sparsity),
132 GB_sparsity_char_matrix (A),
133 GB_sparsity_char_matrix (B)) ;
134 }
135 else
136 {
137 GBURBLE ("emult_02:(%s<%s%s%s>=%s.*%s) ",
138 GB_sparsity_char (C_sparsity),
139 Mask_comp ? "!" : "",
140 GB_sparsity_char_matrix (M),
141 Mask_struct ? ",struct" : "",
142 GB_sparsity_char_matrix (A),
143 GB_sparsity_char_matrix (B)) ;
144 }
145
146 //--------------------------------------------------------------------------
147 // declare workspace
148 //--------------------------------------------------------------------------
149
150 GB_WERK_DECLARE (Work, int64_t) ;
151 int64_t *restrict Wfirst = NULL ;
152 int64_t *restrict Wlast = NULL ;
153 int64_t *restrict Cp_kfirst = NULL ;
154 GB_WERK_DECLARE (A_ek_slicing, int64_t) ;
155
156 //--------------------------------------------------------------------------
157 // get M, A, and B
158 //--------------------------------------------------------------------------
159
160 const int8_t *restrict Mb = (M == NULL) ? NULL : M->b ;
161 const GB_void *restrict Mx = (M == NULL || Mask_struct) ? NULL :
162 (const GB_void *) M->x ;
163 const size_t msize = (M == NULL) ? 0 : M->type->size ;
164
165 const int64_t *restrict Ap = A->p ;
166 const int64_t *restrict Ah = A->h ;
167 const int64_t *restrict Ai = A->i ;
168 const int64_t vlen = A->vlen ;
169 const int64_t vdim = A->vdim ;
170 const int64_t nvec = A->nvec ;
171 const int64_t anz = GB_NNZ (A) ;
172
173 const int8_t *restrict Bb = B->b ;
174 const bool B_is_bitmap = GB_IS_BITMAP (B) ;
175
176 //--------------------------------------------------------------------------
177 // allocate C->p and C->h
178 //--------------------------------------------------------------------------
179
180 GB_OK (GB_new (&C, true, // sparse or hyper (same as A), static header
181 ctype, vlen, vdim, GB_Ap_calloc, C_is_csc,
182 C_sparsity, A->hyper_switch, nvec, Context)) ;
183 int64_t *restrict Cp = C->p ;
184
185 //--------------------------------------------------------------------------
186 // slice the input matrix A
187 //--------------------------------------------------------------------------
188
189 int A_nthreads, A_ntasks ;
190 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
191 GB_SLICE_MATRIX (A, 8, chunk) ;
192
193 //--------------------------------------------------------------------------
194 // count entries in C
195 //--------------------------------------------------------------------------
196
197 C->nvec_nonempty = A->nvec_nonempty ;
198 C->nvec = nvec ;
199 const bool C_has_pattern_of_A = !B_is_bitmap && (M == NULL) ;
200
201 if (!C_has_pattern_of_A)
202 {
203
204 //----------------------------------------------------------------------
205 // allocate workspace
206 //----------------------------------------------------------------------
207
208 GB_WERK_PUSH (Work, 3*A_ntasks, int64_t) ;
209 if (Work == NULL)
210 {
211 // out of memory
212 GB_FREE_ALL ;
213 return (GrB_OUT_OF_MEMORY) ;
214 }
215 Wfirst = Work ;
216 Wlast = Work + A_ntasks ;
217 Cp_kfirst = Work + A_ntasks * 2 ;
218
219 //----------------------------------------------------------------------
220 // count entries in C
221 //----------------------------------------------------------------------
222
223 // This phase is very similar to GB_select_phase1 (GB_ENTRY_SELECTOR).
224
225 if (M == NULL)
226 {
227
228 //------------------------------------------------------------------
229 // C = A.*B where A is sparse/hyper and B is bitmap
230 //------------------------------------------------------------------
231
232 ASSERT (B_is_bitmap) ;
233
234 int tid ;
235 #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1)
236 for (tid = 0 ; tid < A_ntasks ; tid++)
237 {
238 int64_t kfirst = kfirst_Aslice [tid] ;
239 int64_t klast = klast_Aslice [tid] ;
240 Wfirst [tid] = 0 ;
241 Wlast [tid] = 0 ;
242 for (int64_t k = kfirst ; k <= klast ; k++)
243 {
244 // count the entries in C(:,j)
245 int64_t j = GBH (Ah, k) ;
246 int64_t pB_start = j * vlen ;
247 int64_t pA, pA_end ;
248 GB_get_pA (&pA, &pA_end, tid, k,
249 kfirst, klast, pstart_Aslice, Ap, vlen) ;
250 int64_t cjnz = 0 ;
251 for ( ; pA < pA_end ; pA++)
252 {
253 cjnz += Bb [pB_start + Ai [pA]] ;
254 }
255 if (k == kfirst)
256 {
257 Wfirst [tid] = cjnz ;
258 }
259 else if (k == klast)
260 {
261 Wlast [tid] = cjnz ;
262 }
263 else
264 {
265 Cp [k] = cjnz ;
266 }
267 }
268 }
269
270 }
271 else
272 {
273
274 //------------------------------------------------------------------
275 // C<#M> = A.*B where M and B are bitmap/full, A is sparse/hyper
276 //------------------------------------------------------------------
277
278 ASSERT (M != NULL) ;
279
280 int tid ;
281 #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1)
282 for (tid = 0 ; tid < A_ntasks ; tid++)
283 {
284 int64_t kfirst = kfirst_Aslice [tid] ;
285 int64_t klast = klast_Aslice [tid] ;
286 Wfirst [tid] = 0 ;
287 Wlast [tid] = 0 ;
288 for (int64_t k = kfirst ; k <= klast ; k++)
289 {
290 // count the entries in C(:,j)
291 int64_t j = GBH (Ah, k) ;
292 int64_t pB_start = j * vlen ;
293 int64_t pA, pA_end ;
294 GB_get_pA (&pA, &pA_end, tid, k,
295 kfirst, klast, pstart_Aslice, Ap, vlen) ;
296 int64_t cjnz = 0 ;
297 for ( ; pA < pA_end ; pA++)
298 {
299 int64_t i = Ai [pA] ;
300 int64_t pB = pB_start + i ;
301 bool mij = GBB (Mb, pB) && GB_mcast (Mx, pB, msize) ;
302 mij = mij ^ Mask_comp ;
303 cjnz += (mij && GBB (Bb, pB)) ;
304 }
305 if (k == kfirst)
306 {
307 Wfirst [tid] = cjnz ;
308 }
309 else if (k == klast)
310 {
311 Wlast [tid] = cjnz ;
312 }
313 else
314 {
315 Cp [k] = cjnz ;
316 }
317 }
318 }
319 }
320
321 //----------------------------------------------------------------------
322 // finalize Cp, cumulative sum of Cp and compute Cp_kfirst
323 //----------------------------------------------------------------------
324
325 GB_ek_slice_merge1 (Cp, Wfirst, Wlast, A_ek_slicing, A_ntasks) ;
326 GB_ek_slice_merge2 (&(C->nvec_nonempty), Cp_kfirst, Cp, nvec,
327 Wfirst, Wlast, A_ek_slicing, A_ntasks, A_nthreads, Context) ;
328 }
329
330 //--------------------------------------------------------------------------
331 // allocate C->i and C->x
332 //--------------------------------------------------------------------------
333
334 int64_t cnz = (C_has_pattern_of_A) ? anz : Cp [nvec] ;
335 GB_OK (GB_bix_alloc (C, cnz, false, false, true, true, Context)) ;
336
337 //--------------------------------------------------------------------------
338 // copy pattern into C
339 //--------------------------------------------------------------------------
340
341 // TODO: could make these components of C shallow instead of memcpy
342
343 if (GB_IS_HYPERSPARSE (A))
344 {
345 // copy A->h into C->h
346 GB_memcpy (C->h, Ah, nvec * sizeof (int64_t), A_nthreads) ;
347 }
348
349 if (C_has_pattern_of_A)
350 {
351 // B is full and no mask present, so the pattern of C is the same as
352 // the pattern of A
353 GB_memcpy (Cp, Ap, (nvec+1) * sizeof (int64_t), A_nthreads) ;
354 GB_memcpy (C->i, Ai, cnz * sizeof (int64_t), A_nthreads) ;
355 }
356
357 C->jumbled = A->jumbled ;
358 C->magic = GB_MAGIC ;
359
360 //--------------------------------------------------------------------------
361 // special case for the ANY operator
362 //--------------------------------------------------------------------------
363
364 // Replace the ANY operator with SECOND. ANY and SECOND give the same
365 // result if flipxy is false. However, SECOND is changed to FIRST if
366 // flipxy is true. This ensures that the results do not depend on the
367 // sparsity structures of A and B.
368
369 if (op->opcode == GB_ANY_opcode)
370 {
371 switch (op->xtype->code)
372 {
373 case GB_BOOL_code : op = GrB_SECOND_BOOL ; break ;
374 case GB_INT8_code : op = GrB_SECOND_INT8 ; break ;
375 case GB_INT16_code : op = GrB_SECOND_INT16 ; break ;
376 case GB_INT32_code : op = GrB_SECOND_INT32 ; break ;
377 case GB_INT64_code : op = GrB_SECOND_INT64 ; break ;
378 case GB_UINT8_code : op = GrB_SECOND_UINT8 ; break ;
379 case GB_UINT16_code : op = GrB_SECOND_UINT16 ; break ;
380 case GB_UINT32_code : op = GrB_SECOND_UINT32 ; break ;
381 case GB_UINT64_code : op = GrB_SECOND_UINT64 ; break ;
382 case GB_FP32_code : op = GrB_SECOND_FP32 ; break ;
383 case GB_FP64_code : op = GrB_SECOND_FP64 ; break ;
384 case GB_FC32_code : op = GxB_SECOND_FC32 ; break ;
385 case GB_FC64_code : op = GxB_SECOND_FC64 ; break ;
386 default: ;
387 }
388 }
389
390 //--------------------------------------------------------------------------
391 // handle flipxy
392 //--------------------------------------------------------------------------
393
394 if (flipxy)
395 {
396 bool handled ;
397 op = GB_flip_op (op, &handled) ;
398 if (handled) flipxy = false ;
399 }
400 ASSERT_BINARYOP_OK (op, "final op for emult_02", GB0) ;
401
402 //--------------------------------------------------------------------------
403 // get the opcode
404 //--------------------------------------------------------------------------
405
406 // if flipxy was true on input and the op is positional, FIRST, SECOND, or
407 // PAIR, the op has already been flipped, so these tests do not have to
408 // consider that case.
409
410 GB_Opcode opcode = op->opcode ;
411 bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
412 bool op_is_first = (opcode == GB_FIRST_opcode) ;
413 bool op_is_second = (opcode == GB_SECOND_opcode) ;
414 bool op_is_pair = (opcode == GB_PAIR_opcode) ;
415 GB_Type_code ccode = ctype->code ;
416
417 //--------------------------------------------------------------------------
418 // check if the values of A and/or B are ignored
419 //--------------------------------------------------------------------------
420
421 // With C = ewisemult (A,B), only the intersection of A and B is used.
422 // If op is SECOND or PAIR, the values of A are never accessed.
423 // If op is FIRST or PAIR, the values of B are never accessed.
424 // If op is PAIR, the values of A and B are never accessed.
425 // Contrast with ewiseadd.
426
427 // A is passed as x, and B as y, in z = op(x,y)
428 bool A_is_pattern = op_is_second || op_is_pair || op_is_positional ;
429 bool B_is_pattern = op_is_first || op_is_pair || op_is_positional ;
430
431 //--------------------------------------------------------------------------
432 // using a built-in binary operator (except for positional operators)
433 //--------------------------------------------------------------------------
434
435 bool done = false ;
436
437 #ifndef GBCOMPACT
438
439 //----------------------------------------------------------------------
440 // define the worker for the switch factory
441 //----------------------------------------------------------------------
442
443 #define GB_AemultB_02(mult,xname) GB (_AemultB_02_ ## mult ## xname)
444
445 #define GB_BINOP_WORKER(mult,xname) \
446 { \
447 info = GB_AemultB_02(mult,xname) (C, \
448 M, Mask_struct, Mask_comp, A, B, flipxy, \
449 Cp_kfirst, A_ek_slicing, A_ntasks, A_nthreads) ; \
450 done = (info != GrB_NO_VALUE) ; \
451 } \
452 break ;
453
454 //----------------------------------------------------------------------
455 // launch the switch factory
456 //----------------------------------------------------------------------
457
458 GB_Type_code xcode, ycode, zcode ;
459 if (!op_is_positional &&
460 GB_binop_builtin (A->type, A_is_pattern, B->type, B_is_pattern,
461 op, false, &opcode, &xcode, &ycode, &zcode) && ccode == zcode)
462 {
463 #include "GB_binop_factory.c"
464 }
465
466 #endif
467
468 //--------------------------------------------------------------------------
469 // generic worker
470 //--------------------------------------------------------------------------
471
472 if (!done)
473 {
474 GB_BURBLE_MATRIX (C, "(generic emult_02: %s) ", op->name) ;
475 int ewise_method = flipxy ? GB_EMULT_METHOD_02B : GB_EMULT_METHOD_02A ;
476 GB_ewise_generic (C, op, NULL, 0, 0,
477 NULL, NULL, NULL, C_sparsity, ewise_method, Cp_kfirst,
478 NULL, 0, 0, A_ek_slicing, A_ntasks, A_nthreads, NULL, 0, 0,
479 M, Mask_struct, Mask_comp, A, B, Context) ;
480 }
481
482 //--------------------------------------------------------------------------
483 // remove empty vectors from C, if hypersparse
484 //--------------------------------------------------------------------------
485
486 GB_OK (GB_hypermatrix_prune (C, Context)) ;
487
488 //--------------------------------------------------------------------------
489 // free workspace and return result
490 //--------------------------------------------------------------------------
491
492 GB_FREE_WORK ;
493 ASSERT_MATRIX_OK (C, "C output for emult_02", GB0) ;
494 return (GrB_SUCCESS) ;
495 }
496
497