1 //------------------------------------------------------------------------------
2 // GB_emult_03: C<M>= A.*B, M sparse/hyper, A and B bitmap/full
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C<M>= A.*B, M sparse/hyper, A and B bitmap/full.  C has the same sparsity
11 // structure as M, and its pattern is a subset of M.
12 
13             //      ------------------------------------------
14             //      C       <M>=        A       .*      B
15             //      ------------------------------------------
16             //      sparse  sparse      bitmap          bitmap  (method: 03)
17             //      sparse  sparse      bitmap          full    (method: 03)
18             //      sparse  sparse      full            bitmap  (method: 03)
19             //      sparse  sparse      full            full    (method: 03)
20 
21 // TODO: this function can also do eWiseAdd, just as easily.
22 // Just change the "&&" to "||" in the GB_emult_03_template.
23 // If A and B are both full, eadd and emult are identical.
24 
25 #include "GB_ewise.h"
26 #include "GB_emult.h"
27 #include "GB_binop.h"
28 #include "GB_unused.h"
29 #ifndef GBCOMPACT
30 #include "GB_binop__include.h"
31 #endif
32 
33 #define GB_FREE_WORK                        \
34 {                                           \
35     GB_WERK_POP (Work, int64_t) ;           \
36     GB_WERK_POP (M_ek_slicing, int64_t) ;   \
37 }
38 
39 #define GB_FREE_ALL                         \
40 {                                           \
41     GB_FREE_WORK ;                          \
42     GB_phbix_free (C) ;                   \
43 }
44 
GB_emult_03(GrB_Matrix C,const GrB_Type ctype,const bool C_is_csc,const GrB_Matrix M,const bool Mask_struct,bool * mask_applied,const GrB_Matrix A,const GrB_Matrix B,const GrB_BinaryOp op,GB_Context Context)45 GrB_Info GB_emult_03        // C<M>=A.*B, M sparse/hyper, A and B bitmap/full
46 (
47     GrB_Matrix C,           // output matrix, static header
48     const GrB_Type ctype,   // type of output matrix C
49     const bool C_is_csc,    // format of output matrix C
50     const GrB_Matrix M,     // sparse/hyper, not NULL
51     const bool Mask_struct, // if true, use the only structure of M
52     bool *mask_applied,     // if true, the mask was applied
53     const GrB_Matrix A,     // input A matrix (bitmap/full)
54     const GrB_Matrix B,     // input B matrix (bitmap/full)
55     const GrB_BinaryOp op,  // op to perform C = op (A,B)
56     GB_Context Context
57 )
58 {
59 
60     //--------------------------------------------------------------------------
61     // check inputs
62     //--------------------------------------------------------------------------
63 
64     GrB_Info info ;
65     ASSERT (C != NULL && C->static_header) ;
66 
67     ASSERT_MATRIX_OK (M, "M for emult_03", GB0) ;
68     ASSERT_MATRIX_OK (A, "A for emult_03", GB0) ;
69     ASSERT_MATRIX_OK (B, "B for emult_03", GB0) ;
70     ASSERT_BINARYOP_OK (op, "op for emult_03", GB0) ;
71 
72     ASSERT (GB_IS_SPARSE (M) || GB_IS_HYPERSPARSE (M)) ;
73     ASSERT (!GB_PENDING (M)) ;
74     ASSERT (GB_JUMBLED_OK (M)) ;
75     ASSERT (!GB_ZOMBIES (M)) ;
76     ASSERT (GB_IS_BITMAP (A) || GB_IS_FULL (A) || GB_as_if_full (A)) ;
77     ASSERT (GB_IS_BITMAP (B) || GB_IS_FULL (B) || GB_as_if_full (B)) ;
78 
79     int C_sparsity = GB_sparsity (M) ;
80 
81     GBURBLE ("emult_03:(%s<%s>=%s.*%s) ",
82         GB_sparsity_char (C_sparsity),
83         GB_sparsity_char_matrix (M),
84         GB_sparsity_char_matrix (A),
85         GB_sparsity_char_matrix (B)) ;
86 
87     //--------------------------------------------------------------------------
88     // declare workspace
89     //--------------------------------------------------------------------------
90 
91     GB_WERK_DECLARE (Work, int64_t) ;
92     int64_t *restrict Wfirst = NULL ;
93     int64_t *restrict Wlast = NULL ;
94     int64_t *restrict Cp_kfirst = NULL ;
95     GB_WERK_DECLARE (M_ek_slicing, int64_t) ;
96 
97     //--------------------------------------------------------------------------
98     // get M, A, and B
99     //--------------------------------------------------------------------------
100 
101     const int64_t *restrict Mp = M->p ;
102     const int64_t *restrict Mh = M->h ;
103     const int64_t *restrict Mi = M->i ;
104     const GB_void *restrict Mx = (Mask_struct) ? NULL : (GB_void *) M->x ;
105     const int64_t vlen = M->vlen ;
106     const int64_t vdim = M->vdim ;
107     const int64_t nvec = M->nvec ;
108     const int64_t mnz = GB_NNZ (M) ;
109     const size_t  msize = M->type->size ;
110 
111     const int8_t *restrict Ab = A->b ;
112     const int8_t *restrict Bb = B->b ;
113 
114     //--------------------------------------------------------------------------
115     // allocate C->p and C->h
116     //--------------------------------------------------------------------------
117 
118     GB_OK (GB_new (&C, true,  // sparse or hyper (same as M), static header
119         ctype, vlen, vdim, GB_Ap_calloc, C_is_csc,
120         C_sparsity, M->hyper_switch, nvec, Context)) ;
121     int64_t *restrict Cp = C->p ;
122 
123     //--------------------------------------------------------------------------
124     // slice the mask matrix M
125     //--------------------------------------------------------------------------
126 
127     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
128     int M_ntasks, M_nthreads ;
129     GB_SLICE_MATRIX (M, 8, chunk) ;
130 
131     //--------------------------------------------------------------------------
132     // allocate workspace
133     //--------------------------------------------------------------------------
134 
135     GB_WERK_PUSH (Work, 3*M_ntasks, int64_t) ;
136     if (Work == NULL)
137     {
138         // out of memory
139         GB_FREE_ALL ;
140         return (GrB_OUT_OF_MEMORY) ;
141     }
142     Wfirst    = Work ;
143     Wlast     = Work + M_ntasks ;
144     Cp_kfirst = Work + M_ntasks * 2 ;
145 
146     //--------------------------------------------------------------------------
147     // count entries in C
148     //--------------------------------------------------------------------------
149 
150     // This phase is very similar to GB_select_phase1 (GB_ENTRY_SELECTOR).
151 
152     // TODO: if M is structural and A and B are both full, then C has exactly
153     // the same pattern as M, the first phase can be skipped.
154 
155     int tid ;
156     #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1)
157     for (tid = 0 ; tid < M_ntasks ; tid++)
158     {
159         int64_t kfirst = kfirst_Mslice [tid] ;
160         int64_t klast  = klast_Mslice  [tid] ;
161         Wfirst [tid] = 0 ;
162         Wlast  [tid] = 0 ;
163         for (int64_t k = kfirst ; k <= klast ; k++)
164         {
165             // count the entries in C(:,j)
166             int64_t j = GBH (Mh, k) ;
167             int64_t pstart = j * vlen ;     // start of A(:,j) and B(:,j)
168             int64_t pM, pM_end ;
169             GB_get_pA (&pM, &pM_end, tid, k,
170                 kfirst, klast, pstart_Mslice, Mp, vlen) ;
171             int64_t cjnz = 0 ;
172             for ( ; pM < pM_end ; pM++)
173             {
174                 bool mij = GB_mcast (Mx, pM, msize) ;
175                 if (mij)
176                 {
177                     int64_t i = Mi [pM] ;
178                     cjnz +=
179                         (GBB (Ab, pstart + i)
180                         &&      // TODO: for GB_add, use || instead
181                         GBB (Bb, pstart + i)) ;
182                 }
183             }
184             if (k == kfirst)
185             {
186                 Wfirst [tid] = cjnz ;
187             }
188             else if (k == klast)
189             {
190                 Wlast [tid] = cjnz ;
191             }
192             else
193             {
194                 Cp [k] = cjnz ;
195             }
196         }
197     }
198 
199     //--------------------------------------------------------------------------
200     // finalize Cp, cumulative sum of Cp and compute Cp_kfirst
201     //--------------------------------------------------------------------------
202 
203     GB_ek_slice_merge1 (Cp, Wfirst, Wlast, M_ek_slicing, M_ntasks) ;
204     GB_ek_slice_merge2 (&(C->nvec_nonempty), Cp_kfirst, Cp, nvec,
205         Wfirst, Wlast, M_ek_slicing, M_ntasks, M_nthreads, Context) ;
206 
207     //--------------------------------------------------------------------------
208     // allocate C->i and C->x
209     //--------------------------------------------------------------------------
210 
211     int64_t cnz = Cp [nvec] ;
212     GB_OK (GB_bix_alloc (C, cnz, false, false, true, true, Context)) ;
213 
214     //--------------------------------------------------------------------------
215     // copy pattern into C
216     //--------------------------------------------------------------------------
217 
218     // TODO: could make these components of C shallow instead
219 
220     if (GB_IS_HYPERSPARSE (M))
221     {
222         // copy M->h into C->h
223         GB_memcpy (C->h, Mh, nvec * sizeof (int64_t), M_nthreads) ;
224     }
225 
226     C->nvec = nvec ;
227     C->jumbled = M->jumbled ;
228     C->magic = GB_MAGIC ;
229 
230     //--------------------------------------------------------------------------
231     // get the opcode
232     //--------------------------------------------------------------------------
233 
234     GB_Opcode opcode = op->opcode ;
235     bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
236     bool op_is_first  = (opcode == GB_FIRST_opcode) ;
237     bool op_is_second = (opcode == GB_SECOND_opcode) ;
238     bool op_is_pair   = (opcode == GB_PAIR_opcode) ;
239     GB_Type_code ccode = ctype->code ;
240 
241     //--------------------------------------------------------------------------
242     // check if the values of A and/or B are ignored
243     //--------------------------------------------------------------------------
244 
245     // With C = ewisemult (A,B), only the intersection of A and B is used.
246     // If op is SECOND or PAIR, the values of A are never accessed.
247     // If op is FIRST  or PAIR, the values of B are never accessed.
248     // If op is PAIR, the values of A and B are never accessed.
249     // Contrast with ewiseadd.
250 
251     // A is passed as x, and B as y, in z = op(x,y)
252     bool A_is_pattern = op_is_second || op_is_pair || op_is_positional ;
253     bool B_is_pattern = op_is_first  || op_is_pair || op_is_positional ;
254 
255     //--------------------------------------------------------------------------
256     // using a built-in binary operator (except for positional operators)
257     //--------------------------------------------------------------------------
258 
259     bool done = false ;
260 
261     #ifndef GBCOMPACT
262 
263         //----------------------------------------------------------------------
264         // define the worker for the switch factory
265         //----------------------------------------------------------------------
266 
267         #define GB_AemultB_03(mult,xname) GB (_AemultB_03_ ## mult ## xname)
268 
269         #define GB_BINOP_WORKER(mult,xname)                                 \
270         {                                                                   \
271             info = GB_AemultB_03(mult,xname) (C, M, Mask_struct, A, B,      \
272                 Cp_kfirst, M_ek_slicing, M_ntasks, M_nthreads) ;            \
273             done = (info != GrB_NO_VALUE) ;                                 \
274         }                                                                   \
275         break ;
276 
277         //----------------------------------------------------------------------
278         // launch the switch factory
279         //----------------------------------------------------------------------
280 
281         GB_Type_code xcode, ycode, zcode ;
282         if (!op_is_positional &&
283             GB_binop_builtin (A->type, A_is_pattern, B->type, B_is_pattern,
284             op, false, &opcode, &xcode, &ycode, &zcode) && ccode == zcode)
285         {
286             #include "GB_binop_factory.c"
287         }
288 
289     #endif
290 
291     //--------------------------------------------------------------------------
292     // generic worker
293     //--------------------------------------------------------------------------
294 
295     if (!done)
296     {
297         GB_BURBLE_MATRIX (C, "(generic emult_03: %s) ", op->name) ;
298         GB_ewise_generic (C, op, NULL, 0, 0,
299             NULL, NULL, NULL, C_sparsity, GB_EMULT_METHOD_03, Cp_kfirst,
300             M_ek_slicing, M_ntasks, M_nthreads, NULL, 0, 0, NULL, 0, 0,
301             M, Mask_struct, false, A, B, Context) ;
302     }
303 
304     //--------------------------------------------------------------------------
305     // remove empty vectors from C, if hypersparse
306     //--------------------------------------------------------------------------
307 
308     GB_OK (GB_hypermatrix_prune (C, Context)) ;
309 
310     //--------------------------------------------------------------------------
311     // free workspace and return result
312     //--------------------------------------------------------------------------
313 
314     GB_FREE_WORK ;
315     ASSERT_MATRIX_OK (C, "C output for emult_03", GB0) ;
316     (*mask_applied) = true ;
317     return (GrB_SUCCESS) ;
318 }
319 
320