1 //------------------------------------------------------------------------------
2 // GB_reduce_to_scalar: reduce a matrix to a scalar
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // c = accum (c, reduce_to_scalar(A)), reduce entries in a matrix to a scalar.
11 // Does the work for GrB_*_reduce_TYPE, both matrix and vector.
12
13 // This function does not need to know if A is hypersparse or not, and its
14 // result is the same if A is in CSR or CSC format.
15
16 // This function is the only place in all of GraphBLAS where the identity value
17 // of a monoid is required, but only in one special case: it is required to be
18 // the return value of c when A has no entries. The identity value is also
19 // used internally, in the parallel methods below, to initialize a scalar value
20 // in each task. The methods could be rewritten to avoid the use of the
21 // identity value. Since this function requires it anyway, for the special
22 // case when nvals(A) is zero, the existence of the identity value makes the
23 // code a little simpler.
24
25 #include "GB_reduce.h"
26 #include "GB_binop.h"
27 #include "GB_atomics.h"
28 #ifndef GBCOMPACT
29 #include "GB_red__include.h"
30 #endif
31
32 #define GB_FREE_ALL \
33 { \
34 GB_WERK_POP (F, bool) ; \
35 GB_WERK_POP (W, GB_void) ; \
36 }
37
GB_reduce_to_scalar(void * c,const GrB_Type ctype,const GrB_BinaryOp accum,const GrB_Monoid reduce,const GrB_Matrix A,GB_Context Context)38 GrB_Info GB_reduce_to_scalar // s = reduce_to_scalar (A)
39 (
40 void *c, // result scalar
41 const GrB_Type ctype, // the type of scalar, c
42 const GrB_BinaryOp accum, // for c = accum(c,s)
43 const GrB_Monoid reduce, // monoid to do the reduction
44 const GrB_Matrix A, // matrix to reduce
45 GB_Context Context
46 )
47 {
48
49 //--------------------------------------------------------------------------
50 // check inputs
51 //--------------------------------------------------------------------------
52
53 GrB_Info info ;
54 GB_RETURN_IF_NULL_OR_FAULTY (reduce) ;
55 GB_RETURN_IF_FAULTY_OR_POSITIONAL (accum) ;
56 GB_RETURN_IF_NULL (c) ;
57 GB_WERK_DECLARE (W, GB_void) ;
58 GB_WERK_DECLARE (F, bool) ;
59
60 ASSERT_TYPE_OK (ctype, "type of scalar c", GB0) ;
61 ASSERT_MONOID_OK (reduce, "reduce for reduce_to_scalar", GB0) ;
62 ASSERT_BINARYOP_OK_OR_NULL (accum, "accum for reduce_to_scalar", GB0) ;
63 ASSERT_MATRIX_OK (A, "A for reduce_to_scalar", GB0) ;
64
65 // check domains and dimensions for c = accum (c,s)
66 GrB_Type ztype = reduce->op->ztype ;
67 GB_OK (GB_compatible (ctype, NULL, NULL, false, accum, ztype, Context)) ;
68
69 // s = reduce (s,A) must be compatible
70 if (!GB_Type_compatible (A->type, ztype))
71 {
72 return (GrB_DOMAIN_MISMATCH) ;
73 }
74
75 //--------------------------------------------------------------------------
76 // assemble any pending tuples; zombies are OK
77 //--------------------------------------------------------------------------
78
79 GB_MATRIX_WAIT_IF_PENDING (A) ;
80 GB_BURBLE_DENSE (A, "(A %s) ") ;
81
82 ASSERT (GB_ZOMBIES_OK (A)) ;
83 ASSERT (GB_JUMBLED_OK (A)) ;
84 ASSERT (!GB_PENDING (A)) ;
85
86 //--------------------------------------------------------------------------
87 // get A
88 //--------------------------------------------------------------------------
89
90 int64_t asize = A->type->size ;
91 int64_t zsize = ztype->size ;
92 int64_t anz = GB_NNZ_HELD (A) ;
93
94 // s = identity
95 GB_void s [GB_VLA(zsize)] ;
96 memcpy (s, reduce->identity, zsize) ; // required, if nnz(A) is zero
97
98 //--------------------------------------------------------------------------
99 // s = reduce_to_scalar (A) on the GPU(s) or CPU
100 //--------------------------------------------------------------------------
101
102 #if defined ( GBCUDA )
103 if (GB_reduce_to_scalar_cuda_branch (reduce, A, Context))
104 {
105
106 //----------------------------------------------------------------------
107 // use the GPU(s)
108 //----------------------------------------------------------------------
109
110 GB_OK (GB_reduce_to_scalar_cuda (s, reduce, A, Context)) ;
111
112 }
113 else
114 #endif
115 {
116
117 //----------------------------------------------------------------------
118 // use OpenMP on the CPU threads
119 //----------------------------------------------------------------------
120
121 int nthreads = 0, ntasks = 0 ;
122 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
123 nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
124 ntasks = (nthreads == 1) ? 1 : (64 * nthreads) ;
125 ntasks = GB_IMIN (ntasks, anz) ;
126 ntasks = GB_IMAX (ntasks, 1) ;
127
128 //----------------------------------------------------------------------
129 // allocate workspace
130 //----------------------------------------------------------------------
131
132 GB_WERK_PUSH (W, ntasks * zsize, GB_void) ;
133 GB_WERK_PUSH (F, ntasks, bool) ;
134 if (W == NULL || F == NULL)
135 {
136 // out of memory
137 GB_FREE_ALL ;
138 return (GrB_OUT_OF_MEMORY) ;
139 }
140
141 //----------------------------------------------------------------------
142 // s = reduce_to_scalar (A)
143 //----------------------------------------------------------------------
144
145 // get terminal value, if any
146 GB_void *restrict terminal = (GB_void *) reduce->terminal ;
147
148 if (anz == 0)
149 {
150
151 //------------------------------------------------------------------
152 // nothing to do
153 //------------------------------------------------------------------
154
155 ;
156
157 }
158 else if (A->type == ztype)
159 {
160
161 //------------------------------------------------------------------
162 // reduce to scalar via built-in operator
163 //------------------------------------------------------------------
164
165 bool done = false ;
166
167 #ifndef GBCOMPACT
168
169 //--------------------------------------------------------------
170 // define the worker for the switch factory
171 //--------------------------------------------------------------
172
173 #define GB_red(opname,aname) \
174 GB (_red_scalar_ ## opname ## aname)
175
176 #define GB_RED_WORKER(opname,aname,atype) \
177 { \
178 info = GB_red (opname, aname) ((atype *) s, A, W, F, \
179 ntasks, nthreads) ; \
180 done = (info != GrB_NO_VALUE) ; \
181 } \
182 break ;
183
184 //--------------------------------------------------------------
185 // launch the switch factory
186 //--------------------------------------------------------------
187
188 // controlled by opcode and typecode
189 GB_Opcode opcode = reduce->op->opcode ;
190 GB_Type_code typecode = A->type->code ;
191 ASSERT (typecode <= GB_UDT_code) ;
192
193 #include "GB_red_factory.c"
194
195 #endif
196
197 //------------------------------------------------------------------
198 // generic worker: sum up the entries, no typecasting
199 //------------------------------------------------------------------
200
201 if (!done)
202 {
203 GB_BURBLE_MATRIX (A, "(generic reduce to scalar: %s) ",
204 reduce->op->name) ;
205
206 // the switch factory didn't handle this case
207 GxB_binary_function freduce = reduce->op->function ;
208
209 #define GB_ATYPE GB_void
210
211 // no panel used
212 #define GB_PANEL 1
213 #define GB_NO_PANEL_CASE
214
215 // ztype t = identity
216 #define GB_SCALAR_IDENTITY(t) \
217 GB_void t [GB_VLA(zsize)] ; \
218 memcpy (t, reduce->identity, zsize) ;
219
220 // t = W [tid], no typecast
221 #define GB_COPY_ARRAY_TO_SCALAR(t, W, tid) \
222 memcpy (t, W +(tid*zsize), zsize)
223
224 // W [tid] = t, no typecast
225 #define GB_COPY_SCALAR_TO_ARRAY(W, tid, t) \
226 memcpy (W +(tid*zsize), t, zsize)
227
228 // s += W [k], no typecast
229 #define GB_ADD_ARRAY_TO_SCALAR(s,W,k) \
230 freduce (s, s, W +((k)*zsize))
231
232 // break if terminal value reached
233 #define GB_HAS_TERMINAL 1
234 #define GB_IS_TERMINAL(s) \
235 (terminal != NULL && memcmp (s, terminal, zsize) == 0)
236
237 // t = (ztype) Ax [p], but no typecasting needed
238 #define GB_CAST_ARRAY_TO_SCALAR(t,Ax,p) \
239 memcpy (t, Ax +((p)*zsize), zsize)
240
241 // t += (ztype) Ax [p], but no typecasting needed
242 #define GB_ADD_CAST_ARRAY_TO_SCALAR(t,Ax,p) \
243 freduce (t, t, Ax +((p)*zsize))
244
245 #include "GB_reduce_to_scalar_template.c"
246 }
247
248 }
249 else
250 {
251
252 //------------------------------------------------------------------
253 // generic worker: sum up the entries, with typecasting
254 //------------------------------------------------------------------
255
256 GB_BURBLE_MATRIX (A, "(generic reduce to scalar, with typecast:"
257 " %s) ", reduce->op->name) ;
258
259 GxB_binary_function freduce = reduce->op->function ;
260 GB_cast_function
261 cast_A_to_Z = GB_cast_factory (ztype->code, A->type->code) ;
262
263 // t = (ztype) Ax [p], with typecast
264 #undef GB_CAST_ARRAY_TO_SCALAR
265 #define GB_CAST_ARRAY_TO_SCALAR(t,Ax,p) \
266 cast_A_to_Z (t, Ax +((p)*asize), asize)
267
268 // t += (ztype) Ax [p], with typecast
269 #undef GB_ADD_CAST_ARRAY_TO_SCALAR
270 #define GB_ADD_CAST_ARRAY_TO_SCALAR(t,Ax,p) \
271 GB_void awork [GB_VLA(zsize)] ; \
272 cast_A_to_Z (awork, Ax +((p)*asize), asize) ; \
273 freduce (t, t, awork)
274
275 #include "GB_reduce_to_scalar_template.c"
276 }
277 }
278
279 //--------------------------------------------------------------------------
280 // c = s or c = accum (c,s)
281 //--------------------------------------------------------------------------
282
283 // This operation does not use GB_accum_mask, since c and s are
284 // scalars, not matrices. There is no scalar mask.
285
286 if (accum == NULL)
287 {
288 // c = (ctype) s
289 GB_cast_function
290 cast_Z_to_C = GB_cast_factory (ctype->code, ztype->code) ;
291 cast_Z_to_C (c, s, ctype->size) ;
292 }
293 else
294 {
295 GxB_binary_function faccum = accum->function ;
296
297 GB_cast_function cast_C_to_xaccum, cast_Z_to_yaccum, cast_zaccum_to_C ;
298 cast_C_to_xaccum = GB_cast_factory (accum->xtype->code, ctype->code) ;
299 cast_Z_to_yaccum = GB_cast_factory (accum->ytype->code, ztype->code) ;
300 cast_zaccum_to_C = GB_cast_factory (ctype->code, accum->ztype->code) ;
301
302 // scalar workspace
303 GB_void xaccum [GB_VLA(accum->xtype->size)] ;
304 GB_void yaccum [GB_VLA(accum->ytype->size)] ;
305 GB_void zaccum [GB_VLA(accum->ztype->size)] ;
306
307 // xaccum = (accum->xtype) c
308 cast_C_to_xaccum (xaccum, c, ctype->size) ;
309
310 // yaccum = (accum->ytype) s
311 cast_Z_to_yaccum (yaccum, s, zsize) ;
312
313 // zaccum = xaccum "+" yaccum
314 faccum (zaccum, xaccum, yaccum) ;
315
316 // c = (ctype) zaccum
317 cast_zaccum_to_C (c, zaccum, ctype->size) ;
318 }
319
320 //--------------------------------------------------------------------------
321 // free workspace and return result
322 //--------------------------------------------------------------------------
323
324 GB_FREE_ALL ;
325 return (GrB_SUCCESS) ;
326 }
327
328