1 //------------------------------------------------------------------------------
2 // GB_emult_02: C = A.*B where A is sparse/hyper and B is bitmap/full
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C = A.*B where A is sparse/hyper and B is bitmap/full constructs C with
11 // the same sparsity structure as A.  This method can also be called with
12 // the two input matrices swapped, with flipxy true, to handle the case
13 // where A is bitmap/full and B is sparse/hyper.
14 
15 // When no mask is present, or the mask is applied later, this method handles
16 // the following cases:
17 
18         //      ------------------------------------------
19         //      C       =           A       .*      B
20         //      ------------------------------------------
21         //      sparse  .           sparse          bitmap
22         //      sparse  .           sparse          full
23         //      sparse  .           bitmap          sparse
24         //      sparse  .           full            sparse
25 
26 // If M is sparse/hyper and complemented, it is not passed here:
27 
28         //      ------------------------------------------
29         //      C       <!M>=       A       .*      B
30         //      ------------------------------------------
31         //      sparse  sparse      sparse          bitmap  (mask later)
32         //      sparse  sparse      sparse          full    (mask later)
33         //      sparse  sparse      bitmap          sparse  (mask later)
34         //      sparse  sparse      full            sparse  (mask later)
35 
36 // If M is present, it is bitmap/full:
37 
38         //      ------------------------------------------
39         //      C      <M> =        A       .*      B
40         //      ------------------------------------------
41         //      sparse  bitmap      sparse          bitmap
42         //      sparse  bitmap      sparse          full
43         //      sparse  bitmap      bitmap          sparse
44         //      sparse  bitmap      full            sparse
45 
46         //      ------------------------------------------
47         //      C      <M> =        A       .*      B
48         //      ------------------------------------------
49         //      sparse  full        sparse          bitmap
50         //      sparse  full        sparse          full
51         //      sparse  full        bitmap          sparse
52         //      sparse  full        full            sparse
53 
54         //      ------------------------------------------
55         //      C      <!M> =        A       .*      B
56         //      ------------------------------------------
57         //      sparse  bitmap      sparse          bitmap
58         //      sparse  bitmap      sparse          full
59         //      sparse  bitmap      bitmap          sparse
60         //      sparse  bitmap      full            sparse
61 
62         //      ------------------------------------------
63         //      C      <!M> =        A       .*      B
64         //      ------------------------------------------
65         //      sparse  full        sparse          bitmap
66         //      sparse  full        sparse          full
67         //      sparse  full        bitmap          sparse
68         //      sparse  full        full            sparse
69 
70 #include "GB_ewise.h"
71 #include "GB_emult.h"
72 #include "GB_binop.h"
73 #include "GB_unused.h"
74 #ifndef GBCOMPACT
75 #include "GB_binop__include.h"
76 #endif
77 
78 #define GB_FREE_WORK                        \
79 {                                           \
80     GB_WERK_POP (Work, int64_t) ;           \
81     GB_WERK_POP (A_ek_slicing, int64_t) ;   \
82 }
83 
84 #define GB_FREE_ALL                         \
85 {                                           \
86     GB_FREE_WORK ;                          \
87     GB_phbix_free (C) ;                   \
88 }
89 
GB_emult_02(GrB_Matrix C,const GrB_Type ctype,const bool C_is_csc,const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,const GrB_Matrix A,const GrB_Matrix B,GrB_BinaryOp op,bool flipxy,GB_Context Context)90 GrB_Info GB_emult_02        // C=A.*B when A is sparse/hyper, B bitmap/full
91 (
92     GrB_Matrix C,           // output matrix, static header
93     const GrB_Type ctype,   // type of output matrix C
94     const bool C_is_csc,    // format of output matrix C
95     const GrB_Matrix M,     // optional mask, unused if NULL
96     const bool Mask_struct, // if true, use the only structure of M
97     const bool Mask_comp,   // if true, use !M
98     const GrB_Matrix A,     // input A matrix (sparse/hyper)
99     const GrB_Matrix B,     // input B matrix (bitmap/full)
100     GrB_BinaryOp op,        // op to perform C = op (A,B)
101     bool flipxy,            // if true use fmult(y,x) else fmult(x,y)
102     GB_Context Context
103 )
104 {
105 
106     //--------------------------------------------------------------------------
107     // check inputs
108     //--------------------------------------------------------------------------
109 
110     GrB_Info info ;
111     ASSERT (C != NULL && C->static_header) ;
112 
113     ASSERT_MATRIX_OK_OR_NULL (M, "M for emult_02", GB0) ;
114     ASSERT_MATRIX_OK (A, "A for emult_02", GB0) ;
115     ASSERT_MATRIX_OK (B, "B for emult_02", GB0) ;
116     ASSERT_BINARYOP_OK (op, "op for emult_02", GB0) ;
117     ASSERT_TYPE_OK (ctype, "ctype for emult_02", GB0) ;
118 
119     ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A)) ;
120     ASSERT (!GB_PENDING (A)) ;
121     ASSERT (GB_JUMBLED_OK (A)) ;
122     ASSERT (!GB_ZOMBIES (A)) ;
123     ASSERT (GB_IS_BITMAP (B) || GB_IS_FULL (B)) ;
124     ASSERT (M == NULL || GB_IS_BITMAP (B) || GB_IS_FULL (B)) ;
125 
126     int C_sparsity = GB_sparsity (A) ;
127 
128     if (M == NULL)
129     {
130         GBURBLE ("emult_02:(%s=%s.*%s)",
131             GB_sparsity_char (C_sparsity),
132             GB_sparsity_char_matrix (A),
133             GB_sparsity_char_matrix (B)) ;
134     }
135     else
136     {
137         GBURBLE ("emult_02:(%s<%s%s%s>=%s.*%s) ",
138             GB_sparsity_char (C_sparsity),
139             Mask_comp ? "!" : "",
140             GB_sparsity_char_matrix (M),
141             Mask_struct ? ",struct" : "",
142             GB_sparsity_char_matrix (A),
143             GB_sparsity_char_matrix (B)) ;
144     }
145 
146     //--------------------------------------------------------------------------
147     // declare workspace
148     //--------------------------------------------------------------------------
149 
150     GB_WERK_DECLARE (Work, int64_t) ;
151     int64_t *restrict Wfirst    = NULL ;
152     int64_t *restrict Wlast     = NULL ;
153     int64_t *restrict Cp_kfirst = NULL ;
154     GB_WERK_DECLARE (A_ek_slicing, int64_t) ;
155 
156     //--------------------------------------------------------------------------
157     // get M, A, and B
158     //--------------------------------------------------------------------------
159 
160     const int8_t  *restrict Mb = (M == NULL) ? NULL : M->b ;
161     const GB_void *restrict Mx = (M == NULL || Mask_struct) ? NULL :
162         (const GB_void *) M->x ;
163     const size_t msize = (M == NULL) ? 0 : M->type->size ;
164 
165     const int64_t *restrict Ap = A->p ;
166     const int64_t *restrict Ah = A->h ;
167     const int64_t *restrict Ai = A->i ;
168     const int64_t vlen = A->vlen ;
169     const int64_t vdim = A->vdim ;
170     const int64_t nvec = A->nvec ;
171     const int64_t anz = GB_NNZ (A) ;
172 
173     const int8_t *restrict Bb = B->b ;
174     const bool B_is_bitmap = GB_IS_BITMAP (B) ;
175 
176     //--------------------------------------------------------------------------
177     // allocate C->p and C->h
178     //--------------------------------------------------------------------------
179 
180     GB_OK (GB_new (&C, true, // sparse or hyper (same as A), static header
181         ctype, vlen, vdim, GB_Ap_calloc, C_is_csc,
182         C_sparsity, A->hyper_switch, nvec, Context)) ;
183     int64_t *restrict Cp = C->p ;
184 
185     //--------------------------------------------------------------------------
186     // slice the input matrix A
187     //--------------------------------------------------------------------------
188 
189     int A_nthreads, A_ntasks ;
190     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
191     GB_SLICE_MATRIX (A, 8, chunk) ;
192 
193     //--------------------------------------------------------------------------
194     // count entries in C
195     //--------------------------------------------------------------------------
196 
197     C->nvec_nonempty = A->nvec_nonempty ;
198     C->nvec = nvec ;
199     const bool C_has_pattern_of_A = !B_is_bitmap && (M == NULL) ;
200 
201     if (!C_has_pattern_of_A)
202     {
203 
204         //----------------------------------------------------------------------
205         // allocate workspace
206         //----------------------------------------------------------------------
207 
208         GB_WERK_PUSH (Work, 3*A_ntasks, int64_t) ;
209         if (Work == NULL)
210         {
211             // out of memory
212             GB_FREE_ALL ;
213             return (GrB_OUT_OF_MEMORY) ;
214         }
215         Wfirst    = Work ;
216         Wlast     = Work + A_ntasks ;
217         Cp_kfirst = Work + A_ntasks * 2 ;
218 
219         //----------------------------------------------------------------------
220         // count entries in C
221         //----------------------------------------------------------------------
222 
223         // This phase is very similar to GB_select_phase1 (GB_ENTRY_SELECTOR).
224 
225         if (M == NULL)
226         {
227 
228             //------------------------------------------------------------------
229             // C = A.*B where A is sparse/hyper and B is bitmap
230             //------------------------------------------------------------------
231 
232             ASSERT (B_is_bitmap) ;
233 
234             int tid ;
235             #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1)
236             for (tid = 0 ; tid < A_ntasks ; tid++)
237             {
238                 int64_t kfirst = kfirst_Aslice [tid] ;
239                 int64_t klast  = klast_Aslice  [tid] ;
240                 Wfirst [tid] = 0 ;
241                 Wlast  [tid] = 0 ;
242                 for (int64_t k = kfirst ; k <= klast ; k++)
243                 {
244                     // count the entries in C(:,j)
245                     int64_t j = GBH (Ah, k) ;
246                     int64_t pB_start = j * vlen ;
247                     int64_t pA, pA_end ;
248                     GB_get_pA (&pA, &pA_end, tid, k,
249                         kfirst, klast, pstart_Aslice, Ap, vlen) ;
250                     int64_t cjnz = 0 ;
251                     for ( ; pA < pA_end ; pA++)
252                     {
253                         cjnz += Bb [pB_start + Ai [pA]] ;
254                     }
255                     if (k == kfirst)
256                     {
257                         Wfirst [tid] = cjnz ;
258                     }
259                     else if (k == klast)
260                     {
261                         Wlast [tid] = cjnz ;
262                     }
263                     else
264                     {
265                         Cp [k] = cjnz ;
266                     }
267                 }
268             }
269 
270         }
271         else
272         {
273 
274             //------------------------------------------------------------------
275             // C<#M> = A.*B where M and B are bitmap/full, A is sparse/hyper
276             //------------------------------------------------------------------
277 
278             ASSERT (M != NULL) ;
279 
280             int tid ;
281             #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1)
282             for (tid = 0 ; tid < A_ntasks ; tid++)
283             {
284                 int64_t kfirst = kfirst_Aslice [tid] ;
285                 int64_t klast  = klast_Aslice  [tid] ;
286                 Wfirst [tid] = 0 ;
287                 Wlast  [tid] = 0 ;
288                 for (int64_t k = kfirst ; k <= klast ; k++)
289                 {
290                     // count the entries in C(:,j)
291                     int64_t j = GBH (Ah, k) ;
292                     int64_t pB_start = j * vlen ;
293                     int64_t pA, pA_end ;
294                     GB_get_pA (&pA, &pA_end, tid, k,
295                         kfirst, klast, pstart_Aslice, Ap, vlen) ;
296                     int64_t cjnz = 0 ;
297                     for ( ; pA < pA_end ; pA++)
298                     {
299                         int64_t i = Ai [pA] ;
300                         int64_t pB = pB_start + i ;
301                         bool mij = GBB (Mb, pB) && GB_mcast (Mx, pB, msize) ;
302                         mij = mij ^ Mask_comp ;
303                         cjnz += (mij && GBB (Bb, pB)) ;
304                     }
305                     if (k == kfirst)
306                     {
307                         Wfirst [tid] = cjnz ;
308                     }
309                     else if (k == klast)
310                     {
311                         Wlast [tid] = cjnz ;
312                     }
313                     else
314                     {
315                         Cp [k] = cjnz ;
316                     }
317                 }
318             }
319         }
320 
321         //----------------------------------------------------------------------
322         // finalize Cp, cumulative sum of Cp and compute Cp_kfirst
323         //----------------------------------------------------------------------
324 
325         GB_ek_slice_merge1 (Cp, Wfirst, Wlast, A_ek_slicing, A_ntasks) ;
326         GB_ek_slice_merge2 (&(C->nvec_nonempty), Cp_kfirst, Cp, nvec,
327             Wfirst, Wlast, A_ek_slicing, A_ntasks, A_nthreads, Context) ;
328     }
329 
330     //--------------------------------------------------------------------------
331     // allocate C->i and C->x
332     //--------------------------------------------------------------------------
333 
334     int64_t cnz = (C_has_pattern_of_A) ? anz : Cp [nvec] ;
335     GB_OK (GB_bix_alloc (C, cnz, false, false, true, true, Context)) ;
336 
337     //--------------------------------------------------------------------------
338     // copy pattern into C
339     //--------------------------------------------------------------------------
340 
341     // TODO: could make these components of C shallow instead of memcpy
342 
343     if (GB_IS_HYPERSPARSE (A))
344     {
345         // copy A->h into C->h
346         GB_memcpy (C->h, Ah, nvec * sizeof (int64_t), A_nthreads) ;
347     }
348 
349     if (C_has_pattern_of_A)
350     {
351         // B is full and no mask present, so the pattern of C is the same as
352         // the pattern of A
353         GB_memcpy (Cp, Ap, (nvec+1) * sizeof (int64_t), A_nthreads) ;
354         GB_memcpy (C->i, Ai, cnz * sizeof (int64_t), A_nthreads) ;
355     }
356 
357     C->jumbled = A->jumbled ;
358     C->magic = GB_MAGIC ;
359 
360     //--------------------------------------------------------------------------
361     // special case for the ANY operator
362     //--------------------------------------------------------------------------
363 
364     // Replace the ANY operator with SECOND.  ANY and SECOND give the same
365     // result if flipxy is false.  However, SECOND is changed to FIRST if
366     // flipxy is true.  This ensures that the results do not depend on the
367     // sparsity structures of A and B.
368 
369     if (op->opcode == GB_ANY_opcode)
370     {
371         switch (op->xtype->code)
372         {
373             case GB_BOOL_code   : op = GrB_SECOND_BOOL   ; break ;
374             case GB_INT8_code   : op = GrB_SECOND_INT8   ; break ;
375             case GB_INT16_code  : op = GrB_SECOND_INT16  ; break ;
376             case GB_INT32_code  : op = GrB_SECOND_INT32  ; break ;
377             case GB_INT64_code  : op = GrB_SECOND_INT64  ; break ;
378             case GB_UINT8_code  : op = GrB_SECOND_UINT8  ; break ;
379             case GB_UINT16_code : op = GrB_SECOND_UINT16 ; break ;
380             case GB_UINT32_code : op = GrB_SECOND_UINT32 ; break ;
381             case GB_UINT64_code : op = GrB_SECOND_UINT64 ; break ;
382             case GB_FP32_code   : op = GrB_SECOND_FP32   ; break ;
383             case GB_FP64_code   : op = GrB_SECOND_FP64   ; break ;
384             case GB_FC32_code   : op = GxB_SECOND_FC32   ; break ;
385             case GB_FC64_code   : op = GxB_SECOND_FC64   ; break ;
386             default: ;
387         }
388     }
389 
390     //--------------------------------------------------------------------------
391     // handle flipxy
392     //--------------------------------------------------------------------------
393 
394     if (flipxy)
395     {
396         bool handled ;
397         op = GB_flip_op (op, &handled) ;
398         if (handled) flipxy = false ;
399     }
400     ASSERT_BINARYOP_OK (op, "final op for emult_02", GB0) ;
401 
402     //--------------------------------------------------------------------------
403     // get the opcode
404     //--------------------------------------------------------------------------
405 
406     // if flipxy was true on input and the op is positional, FIRST, SECOND, or
407     // PAIR, the op has already been flipped, so these tests do not have to
408     // consider that case.
409 
410     GB_Opcode opcode = op->opcode ;
411     bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
412     bool op_is_first  = (opcode == GB_FIRST_opcode) ;
413     bool op_is_second = (opcode == GB_SECOND_opcode) ;
414     bool op_is_pair   = (opcode == GB_PAIR_opcode) ;
415     GB_Type_code ccode = ctype->code ;
416 
417     //--------------------------------------------------------------------------
418     // check if the values of A and/or B are ignored
419     //--------------------------------------------------------------------------
420 
421     // With C = ewisemult (A,B), only the intersection of A and B is used.
422     // If op is SECOND or PAIR, the values of A are never accessed.
423     // If op is FIRST  or PAIR, the values of B are never accessed.
424     // If op is PAIR, the values of A and B are never accessed.
425     // Contrast with ewiseadd.
426 
427     // A is passed as x, and B as y, in z = op(x,y)
428     bool A_is_pattern = op_is_second || op_is_pair || op_is_positional ;
429     bool B_is_pattern = op_is_first  || op_is_pair || op_is_positional ;
430 
431     //--------------------------------------------------------------------------
432     // using a built-in binary operator (except for positional operators)
433     //--------------------------------------------------------------------------
434 
435     bool done = false ;
436 
437     #ifndef GBCOMPACT
438 
439         //----------------------------------------------------------------------
440         // define the worker for the switch factory
441         //----------------------------------------------------------------------
442 
443         #define GB_AemultB_02(mult,xname) GB (_AemultB_02_ ## mult ## xname)
444 
445         #define GB_BINOP_WORKER(mult,xname)                             \
446         {                                                               \
447             info = GB_AemultB_02(mult,xname) (C,                        \
448                 M, Mask_struct, Mask_comp, A, B, flipxy,                \
449                 Cp_kfirst, A_ek_slicing, A_ntasks, A_nthreads) ;        \
450             done = (info != GrB_NO_VALUE) ;                             \
451         }                                                               \
452         break ;
453 
454         //----------------------------------------------------------------------
455         // launch the switch factory
456         //----------------------------------------------------------------------
457 
458         GB_Type_code xcode, ycode, zcode ;
459         if (!op_is_positional &&
460             GB_binop_builtin (A->type, A_is_pattern, B->type, B_is_pattern,
461             op, false, &opcode, &xcode, &ycode, &zcode) && ccode == zcode)
462         {
463             #include "GB_binop_factory.c"
464         }
465 
466     #endif
467 
468     //--------------------------------------------------------------------------
469     // generic worker
470     //--------------------------------------------------------------------------
471 
472     if (!done)
473     {
474         GB_BURBLE_MATRIX (C, "(generic emult_02: %s) ", op->name) ;
475         int ewise_method = flipxy ? GB_EMULT_METHOD_02B : GB_EMULT_METHOD_02A ;
476         GB_ewise_generic (C, op, NULL, 0, 0,
477             NULL, NULL, NULL, C_sparsity, ewise_method, Cp_kfirst,
478             NULL, 0, 0, A_ek_slicing, A_ntasks, A_nthreads, NULL, 0, 0,
479             M, Mask_struct, Mask_comp, A, B, Context) ;
480     }
481 
482     //--------------------------------------------------------------------------
483     // remove empty vectors from C, if hypersparse
484     //--------------------------------------------------------------------------
485 
486     GB_OK (GB_hypermatrix_prune (C, Context)) ;
487 
488     //--------------------------------------------------------------------------
489     // free workspace and return result
490     //--------------------------------------------------------------------------
491 
492     GB_FREE_WORK ;
493     ASSERT_MATRIX_OK (C, "C output for emult_02", GB0) ;
494     return (GrB_SUCCESS) ;
495 }
496 
497