1 //------------------------------------------------------------------------------
2 // GB_AxB_dot: C<M>=A'*B using dot products
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Parallel matrix-matrix multiply, A'*B, with optional mask M.  This
11 // method is used by GrB_mxm, GrB_vxm, and GrB_mxv.  For both of the latter two
12 // methods, B on input will be an nrows-by-1 column vxector.
13 
14 // This function, and the matrices C, M, A, and B are all CSR/CSC agnostic.
15 // For this discussion, suppose they are CSC, with vlen = # of rows, and vdim =
16 // # of columns.
17 
18 // C=A'*B, C<M>=A'*B or C<!M>=A'*B is being computed.  A has not been
19 // transposed yet (and will not be).  A and B must have the same vector length,
20 // vlen (as if both A and B are CSC matrices with the same number of rows, for
21 // example).  GB_AxB_dot2 and GB_AxB_dot3 operate on A' without forming it.
22 // GB_AxB_dot2 computes C=A'*B and C<!M>=A'*B, and it takes Omega(m*n) time,
23 // if C is m-by-n.  It is thus only suitable for cases when A and B are large,
24 // and C is small.  GB_AxB_dot3 computes C<M>=A'*B, and it only needs to
25 // examine entries in M, taking Omega(nnz(M)) time.  It can thus be used for
26 // very large matrices C.  GB_AxB_dot4 computes C+=A'*B when C is dense.
27 
28 // The output matrix C has not been allocated.  It is an uninitialzed static
29 // header on input.  The mask M is optional.
30 
31 // If the result is computed in-place, then the C parameger is ignored, and the
32 // result is computed in C_in instead.  This case requires the accum operator
33 // to match the monoid of the semiring.
34 
35 // The semiring defines C=A*B.  flipxy modifies how the semiring multiply
36 // operator is applied.  If false, then fmult(aik,bkj) is computed.  If true,
37 // then the operands are swapped, and fmult(bkj,aij) is done instead.
38 
39 // Context: the GB_Context containing the # of threads to use, a string of the
40 // user-callable function that is calling this function (GrB_mxm, GrB_mxv, or
41 // GxB_vxm) and detailed error reports.
42 
43 #include "GB_mxm.h"
44 #define GB_FREE_ALL ;
45 
GB_AxB_dot(GrB_Matrix C,GrB_Matrix C_in,GrB_Matrix M,const bool Mask_comp,const bool Mask_struct,const GrB_Matrix A,const GrB_Matrix B,const GrB_Semiring semiring,const bool flipxy,bool * mask_applied,bool * done_in_place,GB_Context Context)46 GrB_Info GB_AxB_dot                 // dot product (multiple methods)
47 (
48     GrB_Matrix C,                   // output matrix, static header
49     GrB_Matrix C_in,                // input/output matrix, if done in-place
50     GrB_Matrix M,                   // optional mask matrix
51     const bool Mask_comp,           // if true, use !M
52     const bool Mask_struct,         // if true, use the only structure of M
53     const GrB_Matrix A,             // input matrix A
54     const GrB_Matrix B,             // input matrix B
55     const GrB_Semiring semiring,    // semiring that defines C=A*B
56     const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
57     bool *mask_applied,             // if true, mask was applied
58     bool *done_in_place,            // if true, C_in was computed in-place
59     GB_Context Context
60 )
61 {
62 
63     //--------------------------------------------------------------------------
64     // check inputs
65     //--------------------------------------------------------------------------
66 
67     ASSERT (C != NULL && C->static_header) ;
68 
69     ASSERT_MATRIX_OK_OR_NULL (M, "M for dot A'*B", GB0) ;
70     ASSERT (!GB_PENDING (M)) ;
71     ASSERT (GB_JUMBLED_OK (M)) ;
72     ASSERT (!GB_ZOMBIES (M)) ;
73 
74     ASSERT_MATRIX_OK (A, "A for dot A'*B", GB0) ;
75     GB_MATRIX_WAIT (A) ;
76     ASSERT (!GB_PENDING (A)) ;
77     ASSERT (!GB_JUMBLED (A)) ;
78     ASSERT (!GB_ZOMBIES (A)) ;
79 
80     ASSERT_MATRIX_OK (B, "B for dot A'*B", GB0) ;
81     GB_MATRIX_WAIT (B) ;
82     ASSERT (!GB_PENDING (B)) ;
83     ASSERT (!GB_JUMBLED (B)) ;
84     ASSERT (!GB_ZOMBIES (B)) ;
85 
86     ASSERT_SEMIRING_OK (semiring, "semiring for dot A'*B", GB0) ;
87 
88     //--------------------------------------------------------------------------
89     // in-place C+=A'*B.  mask is not present (and not applied)
90     //--------------------------------------------------------------------------
91 
92     if (GB_AxB_dot4_control (C_in, M, Mask_comp))
93     {
94         (*done_in_place) = true ;
95         (*mask_applied) = false ;    // no mask to apply
96         return (GB_AxB_dot4 (C_in, A, B, semiring, flipxy, Context)) ;
97     }
98 
99     //--------------------------------------------------------------------------
100     // check the empty case
101     //--------------------------------------------------------------------------
102 
103     if (A->vlen == 0)
104     {
105         // no work to do; C is an empty matrix, normally hypersparse
106         if (C_in != NULL) return (GrB_SUCCESS) ;
107         return (GB_new (&C, true, // auto sparsity, static header
108             semiring->add->op->ztype, A->vdim, B->vdim, GB_Ap_calloc, true,
109             GxB_AUTO_SPARSITY, GB_Global_hyper_switch_get ( ), 1, Context)) ;
110     }
111 
112     //--------------------------------------------------------------------------
113     // C<M>=A'*B: general case
114     //--------------------------------------------------------------------------
115 
116     if (GB_AxB_dot3_control (M, Mask_comp))
117     {
118 
119         // use dot3 if M is present and not complemented, and either sparse or
120         // hypersparse
121         GBURBLE ("dot3 ") ;
122         (*mask_applied) = true ;    // mask is always applied
123         (*done_in_place) = false ;
124 
125         #if defined ( GBCUDA )
126 
127 // [ replace this with:
128 // if (GB_AxB_dot3_cuda_branch (M, Mask_struct, A, B, semiring, flipxy, Context)
129 
130         // very rough estimate of the work to do
131         int64_t anz = GB_IS_FULL (A) ? GB_NNZ_FULL (A) : GB_NNZ (A) ;
132         int64_t bnz = GB_IS_FULL (B) ? GB_NNZ_FULL (B) : GB_NNZ (B) ;
133         int64_t mnz = GB_NNZ (M) ;
134 
135         double adeg = ((double) anz) / ((double) GB_IMAX (1, A->nvec)) ;
136         double bdeg = ((double) bnz) / ((double) GB_IMAX (1, B->nvec)) ;
137         double work = mnz * GB_IMIN (adeg, bdeg) ;
138 
139         // TODO for GPU: if A or B are not accessed (first, 2nd, or pair
140         // ops) then the type of A can be user-defined here, for CUDA.
141 
142         int ngpus_to_use = GB_ngpus_to_use (work) ;
143         GBURBLE (" work:%g gpus:%d ", work, ngpus_to_use) ;
144         if (ngpus_to_use > 0
145             && (semiring->header_size == 0)     // semiring is built-in
146             && (A->type->code != GB_UDT_code)
147             && (B->type->code != GB_UDT_code)
148             && !GB_IS_BITMAP (A) && !GB_IS_BITMAP (B))
149 // to here ... ]
150         {
151             // use "the" GPU (TODO for GPU: could use multiple GPUs too)
152             return (GB_AxB_dot3_cuda (C, M, Mask_struct, A, B, semiring,
153                 flipxy, Context)) ;
154         }
155         else
156         #endif
157         {
158             // use the CPU
159             return (GB_AxB_dot3 (C, M, Mask_struct, A, B, semiring,
160                 flipxy, Context)) ;
161         }
162     }
163 
164     //--------------------------------------------------------------------------
165     // general case: C<M>=A'*B, C<!M>=A'B*, or C=A'*B, not in-place
166     //--------------------------------------------------------------------------
167 
168     (*mask_applied) = (M != NULL) ; // mask applied if present
169     (*done_in_place) = false ;      // TODO: allow dot2 to work in-place
170     return (GB_AxB_dot2 (C, M, Mask_comp, Mask_struct, A, B, semiring,
171         flipxy, Context)) ;
172 }
173 
174