1 //------------------------------------------------------------------------------
2 // GB_dense_subassign_23: C += B where C is dense and B is sparse or dense
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C and B must have the same vector dimension and vector length.
11 // FUTURE::: the transposed case, C+=B' could easily be done.
12 // The parallelism used is identical to GB_AxB_colscale.
13 
14 // The type of C must match the type of x and z for the accum function, since
15 // C(i,j) = accum (C(i,j), B(i,j)) is handled.  The generic case here can
16 // typecast B(i,j) but not C(i,j).  The case for typecasting of C is handled by
17 // Method 04.
18 
19 // The caller passes in the second matrix as A, but it is called B here to
20 // match its use as the 2nd input to the binary accum operator.  C and B can
21 // have any sparsity structure, but C must be dense.
22 
23 #include "GB_dense.h"
24 #include "GB_binop.h"
25 #ifndef GBCOMPACT
26 #include "GB_binop__include.h"
27 #endif
28 
29 #define GB_FREE_ALL                         \
30 {                                           \
31     GB_WERK_POP (B_ek_slicing, int64_t) ;   \
32 }
33 
GB_dense_subassign_23(GrB_Matrix C,const GrB_Matrix B,const GrB_BinaryOp accum,GB_Context Context)34 GrB_Info GB_dense_subassign_23      // C += B; C is dense, B is sparse or dense
35 (
36     GrB_Matrix C,                   // input/output matrix
37     const GrB_Matrix B,             // input matrix
38     const GrB_BinaryOp accum,       // operator to apply
39     GB_Context Context
40 )
41 {
42 
43     //--------------------------------------------------------------------------
44     // check inputs
45     //--------------------------------------------------------------------------
46 
47     ASSERT (!GB_aliased (C, B)) ;   // NO ALIAS of C==A (A is called B here)
48 
49     //--------------------------------------------------------------------------
50     // check inputs
51     //--------------------------------------------------------------------------
52 
53     GrB_Info info ;
54     ASSERT_MATRIX_OK (C, "C for C+=B", GB0) ;
55     ASSERT (!GB_PENDING (C)) ;
56     ASSERT (!GB_JUMBLED (C)) ;
57     ASSERT (!GB_ZOMBIES (C)) ;
58     ASSERT (GB_is_dense (C)) ;
59 
60     ASSERT_MATRIX_OK (B, "B for C+=B", GB0) ;
61     ASSERT (!GB_PENDING (B)) ;
62     ASSERT (GB_JUMBLED_OK (B)) ;
63     ASSERT (!GB_ZOMBIES (B)) ;
64 
65     ASSERT_BINARYOP_OK (accum, "accum for C+=B", GB0) ;
66     ASSERT (!GB_OP_IS_POSITIONAL (accum)) ;
67     ASSERT (B->vlen == C->vlen) ;
68     ASSERT (B->vdim == C->vdim) ;
69 
70     GB_ENSURE_FULL (C) ;        // convert C to full
71 
72     //--------------------------------------------------------------------------
73     // get the operator
74     //--------------------------------------------------------------------------
75 
76     if (accum->opcode == GB_FIRST_opcode)
77     {
78         // nothing to do
79         return (GrB_SUCCESS) ;
80     }
81 
82     // C = accum (C,B) will be computed
83     ASSERT (C->type == accum->ztype) ;
84     ASSERT (C->type == accum->xtype) ;
85     ASSERT (GB_Type_compatible (B->type, accum->ytype)) ;
86 
87     //--------------------------------------------------------------------------
88     // determine the number of threads to use
89     //--------------------------------------------------------------------------
90 
91     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
92 
93     //--------------------------------------------------------------------------
94     // slice the entries for each task
95     //--------------------------------------------------------------------------
96 
97     GB_WERK_DECLARE (B_ek_slicing, int64_t) ;
98     int B_ntasks, B_nthreads ;
99 
100     if (GB_is_packed (B))
101     {
102         // C is dense and B is either dense or bitmap
103         GBURBLE ("(Z packed) ") ;
104         int64_t bnvec = B->nvec ;
105         int64_t bnz = GB_NNZ_HELD (B) ;
106         B_nthreads = GB_nthreads (bnz + bnvec, chunk, nthreads_max) ;
107         B_ntasks = 0 ;   // unused
108         ASSERT (B_ek_slicing == NULL) ;
109     }
110     else
111     {
112         // create tasks to compute over the matrix B
113         GB_SLICE_MATRIX (B, 32, chunk) ;
114         ASSERT (B_ek_slicing != NULL) ;
115     }
116 
117     //--------------------------------------------------------------------------
118     // C += B, sparse accum into dense, with built-in binary operators
119     //--------------------------------------------------------------------------
120 
121     bool done = false ;
122 
123     #ifndef GBCOMPACT
124 
125         //----------------------------------------------------------------------
126         // define the worker for the switch factory
127         //----------------------------------------------------------------------
128 
129         #define GB_Cdense_accumB(accum,xname) \
130             GB (_Cdense_accumB_ ## accum ## xname)
131 
132         #define GB_BINOP_WORKER(accum,xname)                    \
133         {                                                       \
134             info = GB_Cdense_accumB(accum,xname) (C, B,         \
135                 B_ek_slicing, B_ntasks, B_nthreads) ;           \
136             done = (info != GrB_NO_VALUE) ;                     \
137         }                                                       \
138         break ;
139 
140         //----------------------------------------------------------------------
141         // launch the switch factory
142         //----------------------------------------------------------------------
143 
144         GB_Opcode opcode ;
145         GB_Type_code xcode, ycode, zcode ;
146         if (GB_binop_builtin (C->type, false, B->type, false, // C = C + B
147             accum, false, &opcode, &xcode, &ycode, &zcode))
148         {
149             // accumulate sparse matrix into dense matrix with built-in operator
150             #include "GB_binop_factory.c"
151         }
152 
153     #endif
154 
155     //--------------------------------------------------------------------------
156     // C += B, sparse accum into dense, with typecasting or user-defined op
157     //--------------------------------------------------------------------------
158 
159     if (!done)
160     {
161 
162         //----------------------------------------------------------------------
163         // get operators, functions, workspace, contents of B and C
164         //----------------------------------------------------------------------
165 
166         GB_BURBLE_MATRIX (B, "(generic C+=B) ") ;
167 
168         GxB_binary_function fadd = accum->function ;
169 
170         size_t csize = C->type->size ;
171         size_t bsize = B->type->size ;
172         size_t ysize = accum->ytype->size ;
173 
174         GB_cast_function cast_B_to_Y ;
175 
176         // B is typecasted to y
177         cast_B_to_Y = GB_cast_factory (accum->ytype->code, B->type->code) ;
178 
179         //----------------------------------------------------------------------
180         // C += B via function pointers, and typecasting
181         //----------------------------------------------------------------------
182 
183         // bij = B(i,j), located in Bx [pB].  Note that GB_GETB is used,
184         // since B appears as the 2nd input to z = fadd (x,y)
185         #define GB_GETB(bij,Bx,pB)                                          \
186             GB_void bij [GB_VLA(ysize)] ;                                   \
187             cast_B_to_Y (bij, Bx +((pB)*bsize), bsize)
188 
189         // address of Cx [p]
190         #define GB_CX(p) Cx +((p)*csize)
191 
192         #define GB_BTYPE GB_void
193         #define GB_CTYPE GB_void
194 
195         // no vectorization
196         #define GB_PRAGMA_SIMD_VECTORIZE ;
197 
198         #define GB_BINOP(z,x,y,i,j) fadd (z,x,y)
199         #include "GB_dense_subassign_23_template.c"
200     }
201 
202     //--------------------------------------------------------------------------
203     // free workspace and return result
204     //--------------------------------------------------------------------------
205 
206     GB_FREE_ALL ;
207     ASSERT_MATRIX_OK (C, "C+=B output", GB0) ;
208     return (GrB_SUCCESS) ;
209 }
210 
211