1 //------------------------------------------------------------------------------
2 // GB_reduce_to_scalar: reduce a matrix to a scalar
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // c = accum (c, reduce_to_scalar(A)), reduce entries in a matrix to a scalar.
11 // Does the work for GrB_*_reduce_TYPE, both matrix and vector.
12 
13 // This function does not need to know if A is hypersparse or not, and its
14 // result is the same if A is in CSR or CSC format.
15 
16 // This function is the only place in all of GraphBLAS where the identity value
17 // of a monoid is required, but only in one special case: it is required to be
18 // the return value of c when A has no entries.  The identity value is also
19 // used internally, in the parallel methods below, to initialize a scalar value
20 // in each task.  The methods could be rewritten to avoid the use of the
21 // identity value.  Since this function requires it anyway, for the special
22 // case when nvals(A) is zero, the existence of the identity value makes the
23 // code a little simpler.
24 
25 #include "GB_reduce.h"
26 #include "GB_binop.h"
27 #include "GB_atomics.h"
28 #ifndef GBCOMPACT
29 #include "GB_red__include.h"
30 #endif
31 
32 #define GB_FREE_ALL                 \
33 {                                   \
34     GB_WERK_POP (F, bool) ;         \
35     GB_WERK_POP (W, GB_void) ;      \
36 }
37 
GB_reduce_to_scalar(void * c,const GrB_Type ctype,const GrB_BinaryOp accum,const GrB_Monoid reduce,const GrB_Matrix A,GB_Context Context)38 GrB_Info GB_reduce_to_scalar    // s = reduce_to_scalar (A)
39 (
40     void *c,                    // result scalar
41     const GrB_Type ctype,       // the type of scalar, c
42     const GrB_BinaryOp accum,   // for c = accum(c,s)
43     const GrB_Monoid reduce,    // monoid to do the reduction
44     const GrB_Matrix A,         // matrix to reduce
45     GB_Context Context
46 )
47 {
48 
49     //--------------------------------------------------------------------------
50     // check inputs
51     //--------------------------------------------------------------------------
52 
53     GrB_Info info ;
54     GB_RETURN_IF_NULL_OR_FAULTY (reduce) ;
55     GB_RETURN_IF_FAULTY_OR_POSITIONAL (accum) ;
56     GB_RETURN_IF_NULL (c) ;
57     GB_WERK_DECLARE (W, GB_void) ;
58     GB_WERK_DECLARE (F, bool) ;
59 
60     ASSERT_TYPE_OK (ctype, "type of scalar c", GB0) ;
61     ASSERT_MONOID_OK (reduce, "reduce for reduce_to_scalar", GB0) ;
62     ASSERT_BINARYOP_OK_OR_NULL (accum, "accum for reduce_to_scalar", GB0) ;
63     ASSERT_MATRIX_OK (A, "A for reduce_to_scalar", GB0) ;
64 
65     // check domains and dimensions for c = accum (c,s)
66     GrB_Type ztype = reduce->op->ztype ;
67     GB_OK (GB_compatible (ctype, NULL, NULL, false, accum, ztype, Context)) ;
68 
69     // s = reduce (s,A) must be compatible
70     if (!GB_Type_compatible (A->type, ztype))
71     {
72         return (GrB_DOMAIN_MISMATCH) ;
73     }
74 
75     //--------------------------------------------------------------------------
76     // assemble any pending tuples; zombies are OK
77     //--------------------------------------------------------------------------
78 
79     GB_MATRIX_WAIT_IF_PENDING (A) ;
80     GB_BURBLE_DENSE (A, "(A %s) ") ;
81 
82     ASSERT (GB_ZOMBIES_OK (A)) ;
83     ASSERT (GB_JUMBLED_OK (A)) ;
84     ASSERT (!GB_PENDING (A)) ;
85 
86     //--------------------------------------------------------------------------
87     // get A
88     //--------------------------------------------------------------------------
89 
90     int64_t asize = A->type->size ;
91     int64_t zsize = ztype->size ;
92     int64_t anz = GB_NNZ_HELD (A) ;
93 
94     // s = identity
95     GB_void s [GB_VLA(zsize)] ;
96     memcpy (s, reduce->identity, zsize) ;   // required, if nnz(A) is zero
97 
98     //--------------------------------------------------------------------------
99     // s = reduce_to_scalar (A) on the GPU(s) or CPU
100     //--------------------------------------------------------------------------
101 
102     #if defined ( GBCUDA )
103     if (GB_reduce_to_scalar_cuda_branch (reduce, A, Context))
104     {
105 
106         //----------------------------------------------------------------------
107         // use the GPU(s)
108         //----------------------------------------------------------------------
109 
110         GB_OK (GB_reduce_to_scalar_cuda (s, reduce, A, Context)) ;
111 
112     }
113     else
114     #endif
115     {
116 
117         //----------------------------------------------------------------------
118         // use OpenMP on the CPU threads
119         //----------------------------------------------------------------------
120 
121         int nthreads = 0, ntasks = 0 ;
122         GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
123         nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
124         ntasks = (nthreads == 1) ? 1 : (64 * nthreads) ;
125         ntasks = GB_IMIN (ntasks, anz) ;
126         ntasks = GB_IMAX (ntasks, 1) ;
127 
128         //----------------------------------------------------------------------
129         // allocate workspace
130         //----------------------------------------------------------------------
131 
132         GB_WERK_PUSH (W, ntasks * zsize, GB_void) ;
133         GB_WERK_PUSH (F, ntasks, bool) ;
134         if (W == NULL || F == NULL)
135         {
136             // out of memory
137             GB_FREE_ALL ;
138             return (GrB_OUT_OF_MEMORY) ;
139         }
140 
141         //----------------------------------------------------------------------
142         // s = reduce_to_scalar (A)
143         //----------------------------------------------------------------------
144 
145         // get terminal value, if any
146         GB_void *restrict terminal = (GB_void *) reduce->terminal ;
147 
148         if (anz == 0)
149         {
150 
151             //------------------------------------------------------------------
152             // nothing to do
153             //------------------------------------------------------------------
154 
155             ;
156 
157         }
158         else if (A->type == ztype)
159         {
160 
161             //------------------------------------------------------------------
162             // reduce to scalar via built-in operator
163             //------------------------------------------------------------------
164 
165             bool done = false ;
166 
167             #ifndef GBCOMPACT
168 
169                 //--------------------------------------------------------------
170                 // define the worker for the switch factory
171                 //--------------------------------------------------------------
172 
173                 #define GB_red(opname,aname) \
174                     GB (_red_scalar_ ## opname ## aname)
175 
176                 #define GB_RED_WORKER(opname,aname,atype)                   \
177                 {                                                           \
178                     info = GB_red (opname, aname) ((atype *) s, A, W, F,    \
179                         ntasks, nthreads) ;                                 \
180                     done = (info != GrB_NO_VALUE) ;                         \
181                 }                                                           \
182                 break ;
183 
184                 //--------------------------------------------------------------
185                 // launch the switch factory
186                 //--------------------------------------------------------------
187 
188                 // controlled by opcode and typecode
189                 GB_Opcode opcode = reduce->op->opcode ;
190                 GB_Type_code typecode = A->type->code ;
191                 ASSERT (typecode <= GB_UDT_code) ;
192 
193                 #include "GB_red_factory.c"
194 
195             #endif
196 
197             //------------------------------------------------------------------
198             // generic worker: sum up the entries, no typecasting
199             //------------------------------------------------------------------
200 
201             if (!done)
202             {
203                 GB_BURBLE_MATRIX (A, "(generic reduce to scalar: %s) ",
204                     reduce->op->name) ;
205 
206                 // the switch factory didn't handle this case
207                 GxB_binary_function freduce = reduce->op->function ;
208 
209                 #define GB_ATYPE GB_void
210 
211                 // no panel used
212                 #define GB_PANEL 1
213                 #define GB_NO_PANEL_CASE
214 
215                 // ztype t = identity
216                 #define GB_SCALAR_IDENTITY(t)                           \
217                     GB_void t [GB_VLA(zsize)] ;                         \
218                     memcpy (t, reduce->identity, zsize) ;
219 
220                 // t = W [tid], no typecast
221                 #define GB_COPY_ARRAY_TO_SCALAR(t, W, tid)              \
222                     memcpy (t, W +(tid*zsize), zsize)
223 
224                 // W [tid] = t, no typecast
225                 #define GB_COPY_SCALAR_TO_ARRAY(W, tid, t)              \
226                     memcpy (W +(tid*zsize), t, zsize)
227 
228                 // s += W [k], no typecast
229                 #define GB_ADD_ARRAY_TO_SCALAR(s,W,k)                   \
230                     freduce (s, s, W +((k)*zsize))
231 
232                 // break if terminal value reached
233                 #define GB_HAS_TERMINAL 1
234                 #define GB_IS_TERMINAL(s) \
235                     (terminal != NULL && memcmp (s, terminal, zsize) == 0)
236 
237                 // t = (ztype) Ax [p], but no typecasting needed
238                 #define GB_CAST_ARRAY_TO_SCALAR(t,Ax,p)                 \
239                     memcpy (t, Ax +((p)*zsize), zsize)
240 
241                 // t += (ztype) Ax [p], but no typecasting needed
242                 #define GB_ADD_CAST_ARRAY_TO_SCALAR(t,Ax,p)             \
243                     freduce (t, t, Ax +((p)*zsize))
244 
245                 #include "GB_reduce_to_scalar_template.c"
246             }
247 
248         }
249         else
250         {
251 
252             //------------------------------------------------------------------
253             // generic worker: sum up the entries, with typecasting
254             //------------------------------------------------------------------
255 
256             GB_BURBLE_MATRIX (A, "(generic reduce to scalar, with typecast:"
257                 " %s) ", reduce->op->name) ;
258 
259             GxB_binary_function freduce = reduce->op->function ;
260             GB_cast_function
261                 cast_A_to_Z = GB_cast_factory (ztype->code, A->type->code) ;
262 
263                 // t = (ztype) Ax [p], with typecast
264                 #undef  GB_CAST_ARRAY_TO_SCALAR
265                 #define GB_CAST_ARRAY_TO_SCALAR(t,Ax,p)                 \
266                     cast_A_to_Z (t, Ax +((p)*asize), asize)
267 
268                 // t += (ztype) Ax [p], with typecast
269                 #undef  GB_ADD_CAST_ARRAY_TO_SCALAR
270                 #define GB_ADD_CAST_ARRAY_TO_SCALAR(t,Ax,p)             \
271                     GB_void awork [GB_VLA(zsize)] ;                     \
272                     cast_A_to_Z (awork, Ax +((p)*asize), asize) ;       \
273                     freduce (t, t, awork)
274 
275                 #include "GB_reduce_to_scalar_template.c"
276         }
277     }
278 
279     //--------------------------------------------------------------------------
280     // c = s or c = accum (c,s)
281     //--------------------------------------------------------------------------
282 
283     // This operation does not use GB_accum_mask, since c and s are
284     // scalars, not matrices.  There is no scalar mask.
285 
286     if (accum == NULL)
287     {
288         // c = (ctype) s
289         GB_cast_function
290             cast_Z_to_C = GB_cast_factory (ctype->code, ztype->code) ;
291         cast_Z_to_C (c, s, ctype->size) ;
292     }
293     else
294     {
295         GxB_binary_function faccum = accum->function ;
296 
297         GB_cast_function cast_C_to_xaccum, cast_Z_to_yaccum, cast_zaccum_to_C ;
298         cast_C_to_xaccum = GB_cast_factory (accum->xtype->code, ctype->code) ;
299         cast_Z_to_yaccum = GB_cast_factory (accum->ytype->code, ztype->code) ;
300         cast_zaccum_to_C = GB_cast_factory (ctype->code, accum->ztype->code) ;
301 
302         // scalar workspace
303         GB_void xaccum [GB_VLA(accum->xtype->size)] ;
304         GB_void yaccum [GB_VLA(accum->ytype->size)] ;
305         GB_void zaccum [GB_VLA(accum->ztype->size)] ;
306 
307         // xaccum = (accum->xtype) c
308         cast_C_to_xaccum (xaccum, c, ctype->size) ;
309 
310         // yaccum = (accum->ytype) s
311         cast_Z_to_yaccum (yaccum, s, zsize) ;
312 
313         // zaccum = xaccum "+" yaccum
314         faccum (zaccum, xaccum, yaccum) ;
315 
316         // c = (ctype) zaccum
317         cast_zaccum_to_C (c, zaccum, ctype->size) ;
318     }
319 
320     //--------------------------------------------------------------------------
321     // free workspace and return result
322     //--------------------------------------------------------------------------
323 
324     GB_FREE_ALL ;
325     return (GrB_SUCCESS) ;
326 }
327 
328