1 //------------------------------------------------------------------------------
2 // GB_selector: select entries from a matrix
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // GB_selector does the work for GB_select and the GxB_*select methods.
11 // It also deletes zombies for GB_Matrix_wait using the NONZOMBIE operator,
12 // and deletes entries outside a smaller matrix for GxB_*resize.
13
14 // TODO: GB_selector does not exploit the mask.
15
16 // If C is NULL on input, A is modified in-place.
17 // Otherwise, C is an uninitialized static header.
18
19 #include "GB_select.h"
20 #include "GB_ek_slice.h"
21 #include "GB_sel__include.h"
22
23 #define GB_FREE_WORK \
24 { \
25 GB_FREE_WERK (&Zp, Zp_size) ; \
26 GB_WERK_POP (Work, int64_t) ; \
27 GB_WERK_POP (A_ek_slicing, int64_t) ; \
28 GB_FREE (&Cp, Cp_size) ; \
29 GB_FREE (&Ch, Ch_size) ; \
30 GB_FREE (&Ci, Ci_size) ; \
31 GB_FREE (&Cx, Cx_size) ; \
32 }
33
34 #define GB_FREE_ALL \
35 { \
36 GB_phbix_free (C) ; \
37 GB_FREE_WORK ; \
38 }
39
GB_selector(GrB_Matrix C,GB_Select_Opcode opcode,const GxB_SelectOp op,const bool flipij,GrB_Matrix A,int64_t ithunk,const GxB_Scalar Thunk,GB_Context Context)40 GrB_Info GB_selector
41 (
42 GrB_Matrix C, // output matrix, NULL or static header
43 GB_Select_Opcode opcode, // selector opcode
44 const GxB_SelectOp op, // user operator
45 const bool flipij, // if true, flip i and j for user operator
46 GrB_Matrix A, // input matrix
47 int64_t ithunk, // (int64_t) Thunk, if Thunk is NULL
48 const GxB_Scalar Thunk, // optional input for select operator
49 GB_Context Context
50 )
51 {
52
53 //--------------------------------------------------------------------------
54 // check inputs
55 //--------------------------------------------------------------------------
56
57 ASSERT_SELECTOP_OK_OR_NULL (op, "selectop for GB_selector", GB0) ;
58 ASSERT_SCALAR_OK_OR_NULL (Thunk, "Thunk for GB_selector", GB0) ;
59 ASSERT (opcode >= 0 && opcode <= GB_USER_SELECT_opcode) ;
60
61 ASSERT_MATRIX_OK (A, "A input for GB_selector", GB_FLIP (GB0)) ;
62 // positional selector (tril, triu, diag, offdiag, resize): can't be jumbled
63 ASSERT (GB_IMPLIES (opcode <= GB_RESIZE_opcode, !GB_JUMBLED (A))) ;
64 // entry selector: jumbled OK
65 ASSERT (GB_IMPLIES (opcode > GB_RESIZE_opcode, GB_JUMBLED_OK (A))) ;
66
67 GrB_Info info ;
68 bool in_place_A = (C == NULL) ; // GrB_Matrix_wait and GB_resize only
69 ASSERT (C == NULL || (C != NULL && C->static_header)) ;
70
71 //--------------------------------------------------------------------------
72 // declare workspace
73 //--------------------------------------------------------------------------
74
75 int64_t *restrict Zp = NULL ; size_t Zp_size = 0 ;
76 GB_WERK_DECLARE (Work, int64_t) ;
77 int64_t *restrict Wfirst = NULL ;
78 int64_t *restrict Wlast = NULL ;
79 int64_t *restrict Cp_kfirst = NULL ;
80 GB_WERK_DECLARE (A_ek_slicing, int64_t) ;
81
82 //--------------------------------------------------------------------------
83 // get Thunk
84 //--------------------------------------------------------------------------
85
86 // The scalar value of Thunk(0) is typecasted to an integer (int64_t
87 // ithunk) for built-in operators (tril, triu, diag, offdiag, and resize).
88 // It is also typecast to the same type as A (to the scalar athunk). This
89 // is used for gt, ge, lt, le, ne, eq to Thunk, for built-in types.
90
91 // If Thunk is NULL, or has no entry, it is treated as a scalar value
92 // of zero.
93
94 const int64_t asize = A->type->size ;
95 const GB_Type_code typecode = A->type->code ;
96
97 GB_void athunk [GB_VLA(asize)] ;
98 memset (athunk, 0, asize) ;
99 GB_void *restrict xthunk = athunk ;
100
101 if (Thunk != NULL && GB_NNZ (Thunk) > 0)
102 {
103 // xthunk points to Thunk->x for user-defined select operators
104 xthunk = (GB_void *) Thunk->x ;
105 GB_Type_code tcode = Thunk->type->code ;
106 ithunk = 0 ;
107 if (tcode <= GB_FP64_code && opcode < GB_USER_SELECT_opcode)
108 {
109 // ithunk = (int64_t) Thunk (0)
110 size_t tsize = Thunk->type->size ;
111 GB_cast_array ((GB_void *restrict) &ithunk, GB_INT64_code,
112 xthunk, tcode, NULL, tsize, 1, 1) ;
113 // athunk = (atype) Thunk (0)
114 GB_cast_array (athunk, typecode, xthunk, tcode, NULL, tsize, 1, 1) ;
115 // xthunk now points to the typecasted (atype) Thunk (0)
116 xthunk = athunk ;
117 }
118 }
119
120 //--------------------------------------------------------------------------
121 // get the user-defined operator
122 //--------------------------------------------------------------------------
123
124 GxB_select_function user_select = NULL ;
125 if (op != NULL && opcode >= GB_USER_SELECT_opcode)
126 {
127 GB_BURBLE_MATRIX (A, "(generic select: %s) ", op->name) ;
128 user_select = (GxB_select_function) (op->function) ;
129 }
130
131 //--------------------------------------------------------------------------
132 // handle the packed case (bitmap, full, or all entries present)
133 //--------------------------------------------------------------------------
134
135 bool use_bitmap_selector ;
136 if (opcode == GB_RESIZE_opcode || opcode == GB_NONZOMBIE_opcode)
137 {
138 // GB_bitmap_selector does not support these opcodes. For the RESIZE
139 // and NONZOMBIE operators, A will never be bitmap. A is converted to
140 // hypersparse first for RESIZE, and a full/bitmap matrix never has
141 // zombies.
142 use_bitmap_selector = false ;
143 }
144 else if (opcode == GB_DIAG_opcode)
145 {
146 // GB_bitmap_selector supports the DIAG operator, but it is currently
147 // not efficient (GB_bitmap_selector should return a sparse diagonal
148 // matrix, not bitmap). So use the sparse case if A is not bitmap,
149 // since the sparse case below does not support the bitmap case.
150 use_bitmap_selector = GB_IS_BITMAP (A) ;
151 }
152 else
153 {
154 // For bitmap, full, or packed matrices (sparse/hypersparse with all
155 // entries present, not jumbled, no zombies, and no pending tuples),
156 // use the bitmap selector for all other operators (TRIL, TRIU,
157 // OFFDIAG, NONZERO, EQ*, GT*, GE*, LT*, LE*, and user-defined
158 // operators).
159 use_bitmap_selector = GB_is_packed (A) ;
160 }
161
162 //--------------------------------------------------------------------------
163 // bitmap/full case
164 //--------------------------------------------------------------------------
165
166 if (use_bitmap_selector)
167 {
168 // this case is only used by GB_select
169 GB_BURBLE_MATRIX (A, "(bitmap select: %s) ", op->name) ;
170 ASSERT (C != NULL && C->static_header) ;
171 return (GB_bitmap_selector (C, opcode, user_select, flipij, A,
172 ithunk, xthunk, Context)) ;
173 }
174
175 //--------------------------------------------------------------------------
176 // get A: sparse, hypersparse, or full
177 //--------------------------------------------------------------------------
178
179 // the case when A is bitmap is always handled above by GB_bitmap_selector
180 ASSERT (!GB_IS_BITMAP (A)) ;
181
182 int64_t *restrict Ap = A->p ; size_t Ap_size = A->p_size ;
183 int64_t *restrict Ah = A->h ;
184 int64_t *restrict Ai = A->i ; size_t Ai_size = A->i_size ;
185 GB_void *restrict Ax = (GB_void *) A->x ; size_t Ax_size = A->x_size ;
186 int64_t avlen = A->vlen ;
187 int64_t avdim = A->vdim ;
188 int64_t anvec = A->nvec ;
189 bool A_jumbled = A->jumbled ;
190
191 //--------------------------------------------------------------------------
192 // allocate the new vector pointers of C
193 //--------------------------------------------------------------------------
194
195 int64_t *restrict Cp = NULL ; size_t Cp_size = 0 ;
196 int64_t *restrict Ch = NULL ; size_t Ch_size = 0 ;
197 int64_t *restrict Ci = NULL ; size_t Ci_size = 0 ;
198 GB_void *restrict Cx = NULL ; size_t Cx_size = 0 ;
199 int64_t cnz = 0 ;
200
201 Cp = GB_CALLOC (anvec+1, int64_t, &Cp_size) ;
202 if (Cp == NULL)
203 {
204 // out of memory
205 return (GrB_OUT_OF_MEMORY) ;
206 }
207
208 //--------------------------------------------------------------------------
209 // determine the number of threads and tasks to use
210 //--------------------------------------------------------------------------
211
212 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
213
214 //--------------------------------------------------------------------------
215 // slice the entries for each task
216 //--------------------------------------------------------------------------
217
218 int A_ntasks, A_nthreads ;
219 double work = 8*anvec + ((opcode == GB_DIAG_opcode) ? 0 : GB_NNZ_HELD (A)) ;
220 GB_SLICE_MATRIX_WORK (A, 8, chunk, work) ;
221
222 //--------------------------------------------------------------------------
223 // allocate workspace for each task
224 //--------------------------------------------------------------------------
225
226 GB_WERK_PUSH (Work, 3*A_ntasks, int64_t) ;
227 if (Work == NULL)
228 {
229 // out of memory
230 GB_FREE_ALL ;
231 return (GrB_OUT_OF_MEMORY) ;
232 }
233 Wfirst = Work ;
234 Wlast = Work + A_ntasks ;
235 Cp_kfirst = Work + A_ntasks * 2 ;
236
237 //--------------------------------------------------------------------------
238 // count the live entries in each vector
239 //--------------------------------------------------------------------------
240
241 // Count the number of live entries in each vector of A. The result is
242 // computed in Cp, where Cp [k] is the number of live entries in the kth
243 // vector of A.
244
245 if (opcode <= GB_RESIZE_opcode)
246 {
247 // allocate Zp
248 Zp = GB_MALLOC_WERK (anvec, int64_t, &Zp_size) ;
249 if (Zp == NULL)
250 {
251 // out of memory
252 GB_FREE_ALL ;
253 return (GrB_OUT_OF_MEMORY) ;
254 }
255 }
256
257 //--------------------------------------------------------------------------
258 // phase1: count the entries
259 //--------------------------------------------------------------------------
260
261 // define the worker for the switch factory
262 #define GB_SELECT_PHASE1
263 #define GB_sel1(opname,aname) GB (_sel_phase1_ ## opname ## aname)
264 #define GB_SEL_WORKER(opname,aname,atype) \
265 { \
266 GB_sel1 (opname, aname) (Zp, Cp, Wfirst, Wlast, \
267 A, flipij, ithunk, \
268 (atype *) xthunk, user_select, \
269 A_ek_slicing, A_ntasks, A_nthreads) ; \
270 } \
271 break ;
272
273 // launch the switch factory
274 #include "GB_select_factory.c"
275
276 #undef GB_SELECT_PHASE1
277 #undef GB_SEL_WORKER
278
279 //--------------------------------------------------------------------------
280 // cumulative sum of Cp and compute Cp_kfirst
281 //--------------------------------------------------------------------------
282
283 int64_t C_nvec_nonempty ;
284 GB_ek_slice_merge2 (&C_nvec_nonempty, Cp_kfirst, Cp, anvec,
285 Wfirst, Wlast, A_ek_slicing, A_ntasks, A_nthreads, Context) ;
286
287 //--------------------------------------------------------------------------
288 // allocate new space for the compacted Ci and Cx
289 //--------------------------------------------------------------------------
290
291 cnz = Cp [anvec] ;
292 cnz = GB_IMAX (cnz, 1) ;
293 Ci = GB_MALLOC (cnz, int64_t, &Ci_size) ;
294 Cx = GB_MALLOC (cnz * asize, GB_void, &Cx_size) ;
295 if (Ci == NULL || Cx == NULL)
296 {
297 // out of memory
298 GB_FREE_ALL ;
299 return (GrB_OUT_OF_MEMORY) ;
300 }
301
302 if (opcode == GB_EQ_ZERO_opcode)
303 {
304 // Set Cx [0..cnz-1] to all zero, so that phase2 only needs to
305 // construct the pattern in Ci.
306 GB_memset (Cx, 0, cnz * asize, nthreads_max) ;
307 }
308
309 //--------------------------------------------------------------------------
310 // phase2: select the entries
311 //--------------------------------------------------------------------------
312
313 // define the worker for the switch factory
314 #define GB_SELECT_PHASE2
315 #define GB_sel2(opname,aname) GB (_sel_phase2_ ## opname ## aname)
316 #define GB_SEL_WORKER(opname,aname,atype) \
317 { \
318 GB_sel2 (opname, aname) (Ci, (atype *) Cx, \
319 Zp, Cp, Cp_kfirst, \
320 A, flipij, ithunk, \
321 (atype *) xthunk, user_select, \
322 A_ek_slicing, A_ntasks, A_nthreads) ; \
323 } \
324 break ;
325
326 // launch the switch factory
327 #include "GB_select_factory.c"
328
329 //--------------------------------------------------------------------------
330 // create the result
331 //--------------------------------------------------------------------------
332
333 if (in_place_A)
334 {
335
336 //----------------------------------------------------------------------
337 // transplant Cp, Ci, Cx back into A
338 //----------------------------------------------------------------------
339
340 // TODO: this is not parallel: use GB_hyper_prune
341 if (A->h != NULL && C_nvec_nonempty < anvec)
342 {
343 // prune empty vectors from Ah and Ap
344 int64_t cnvec = 0 ;
345 for (int64_t k = 0 ; k < anvec ; k++)
346 {
347 if (Cp [k] < Cp [k+1])
348 {
349 Ah [cnvec] = Ah [k] ;
350 Ap [cnvec] = Cp [k] ;
351 cnvec++ ;
352 }
353 }
354 Ap [cnvec] = Cp [anvec] ;
355 A->nvec = cnvec ;
356 ASSERT (A->nvec == C_nvec_nonempty) ;
357 GB_FREE (&Cp, Cp_size) ;
358 }
359 else
360 {
361 // free the old A->p and transplant in Cp as the new A->p
362 GB_FREE (&Ap, Ap_size) ;
363 A->p = Cp ; Cp = NULL ; A->p_size = Cp_size ;
364 }
365
366 ASSERT (Cp == NULL) ;
367
368 GB_FREE (&Ai, Ai_size) ;
369 GB_FREE (&Ax, Ax_size) ;
370 A->i = Ci ; Ci = NULL ; A->i_size = Ci_size ;
371 A->x = Cx ; Cx = NULL ; A->x_size = Cx_size ;
372 A->nzmax = cnz ;
373 A->nvec_nonempty = C_nvec_nonempty ;
374 A->jumbled = A_jumbled ; // A remains jumbled (in-place select)
375
376 // the NONZOMBIE opcode may have removed all zombies, but A->nzombie
377 // is still nonzero. It set to zero in GB_Matrix_wait.
378 ASSERT_MATRIX_OK (A, "A output for GB_selector", GB_FLIP (GB0)) ;
379
380 // positional selector (tril, triu, diag, offdiag, resize): not jumbled
381 ASSERT (GB_IMPLIES (opcode <= GB_RESIZE_opcode, !GB_JUMBLED (A))) ;
382 // entry selector: C can be returned as jumbled
383 ASSERT (GB_IMPLIES (opcode > GB_RESIZE_opcode, GB_JUMBLED_OK (A))) ;
384
385 }
386 else
387 {
388
389 //----------------------------------------------------------------------
390 // create C and transplant Cp, Ch, Ci, Cx into C
391 //----------------------------------------------------------------------
392
393 int sparsity = (A->h != NULL) ? GxB_HYPERSPARSE : GxB_SPARSE ;
394 ASSERT (C != NULL && C->static_header) ;
395 info = GB_new (&C, true, // sparse or hyper (from A), static header
396 A->type, avlen, avdim, GB_Ap_null, true,
397 sparsity, A->hyper_switch, anvec, Context) ;
398 ASSERT (info == GrB_SUCCESS) ;
399
400 if (A->h != NULL)
401 {
402
403 //------------------------------------------------------------------
404 // A and C are hypersparse: copy non-empty vectors from Ah to Ch
405 //------------------------------------------------------------------
406
407 Ch = GB_MALLOC (anvec, int64_t, &Ch_size) ;
408 if (Ch == NULL)
409 {
410 // out of memory
411 GB_FREE_ALL ;
412 return (GrB_OUT_OF_MEMORY) ;
413 }
414
415 // TODO: do in parallel: use GB_hyper_prune
416 int64_t cnvec = 0 ;
417 for (int64_t k = 0 ; k < anvec ; k++)
418 {
419 if (Cp [k] < Cp [k+1])
420 {
421 Ch [cnvec] = Ah [k] ;
422 Cp [cnvec] = Cp [k] ;
423 cnvec++ ;
424 }
425 }
426 Cp [cnvec] = Cp [anvec] ;
427 C->nvec = cnvec ;
428 ASSERT (C->nvec == C_nvec_nonempty) ;
429 }
430
431 C->p = Cp ; Cp = NULL ; C->p_size = Cp_size ;
432 C->h = Ch ; Ch = NULL ; C->h_size = Ch_size ;
433 C->i = Ci ; Ci = NULL ; C->i_size = Ci_size ;
434 C->x = Cx ; Cx = NULL ; C->x_size = Cx_size ;
435 C->nzmax = cnz ;
436 C->magic = GB_MAGIC ;
437 C->nvec_nonempty = C_nvec_nonempty ;
438 C->jumbled = A_jumbled ; // C is jumbled if A is jumbled
439
440 ASSERT_MATRIX_OK (C, "C output for GB_selector", GB0) ;
441
442 // positional selector (tril, triu, diag, offdiag, resize): not jumbled
443 ASSERT (GB_IMPLIES (opcode <= GB_RESIZE_opcode, !GB_JUMBLED (C))) ;
444 // entry selector: C can be returned as jumbled
445 ASSERT (GB_IMPLIES (opcode > GB_RESIZE_opcode, GB_JUMBLED_OK (C))) ;
446 }
447
448 //--------------------------------------------------------------------------
449 // free workspace and return result
450 //--------------------------------------------------------------------------
451
452 GB_FREE_WORK ;
453 return (GrB_SUCCESS) ;
454 }
455
456