1 //------------------------------------------------------------------------------
2 // GB_selector:  select entries from a matrix
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // GB_selector does the work for GB_select and the GxB_*select methods.
11 // It also deletes zombies for GB_Matrix_wait using the NONZOMBIE operator,
12 // and deletes entries outside a smaller matrix for GxB_*resize.
13 
14 // TODO: GB_selector does not exploit the mask.
15 
16 // If C is NULL on input, A is modified in-place.
17 // Otherwise, C is an uninitialized static header.
18 
19 #include "GB_select.h"
20 #include "GB_ek_slice.h"
21 #include "GB_sel__include.h"
22 
23 #define GB_FREE_WORK                        \
24 {                                           \
25     GB_FREE_WERK (&Zp, Zp_size) ;           \
26     GB_WERK_POP (Work, int64_t) ;           \
27     GB_WERK_POP (A_ek_slicing, int64_t) ;   \
28     GB_FREE (&Cp, Cp_size) ;                \
29     GB_FREE (&Ch, Ch_size) ;                \
30     GB_FREE (&Ci, Ci_size) ;                \
31     GB_FREE (&Cx, Cx_size) ;                \
32 }
33 
34 #define GB_FREE_ALL                         \
35 {                                           \
36     GB_phbix_free (C) ;                     \
37     GB_FREE_WORK ;                          \
38 }
39 
GB_selector(GrB_Matrix C,GB_Select_Opcode opcode,const GxB_SelectOp op,const bool flipij,GrB_Matrix A,int64_t ithunk,const GxB_Scalar Thunk,GB_Context Context)40 GrB_Info GB_selector
41 (
42     GrB_Matrix C,               // output matrix, NULL or static header
43     GB_Select_Opcode opcode,    // selector opcode
44     const GxB_SelectOp op,      // user operator
45     const bool flipij,          // if true, flip i and j for user operator
46     GrB_Matrix A,               // input matrix
47     int64_t ithunk,             // (int64_t) Thunk, if Thunk is NULL
48     const GxB_Scalar Thunk,     // optional input for select operator
49     GB_Context Context
50 )
51 {
52 
53     //--------------------------------------------------------------------------
54     // check inputs
55     //--------------------------------------------------------------------------
56 
57     ASSERT_SELECTOP_OK_OR_NULL (op, "selectop for GB_selector", GB0) ;
58     ASSERT_SCALAR_OK_OR_NULL (Thunk, "Thunk for GB_selector", GB0) ;
59     ASSERT (opcode >= 0 && opcode <= GB_USER_SELECT_opcode) ;
60 
61     ASSERT_MATRIX_OK (A, "A input for GB_selector", GB_FLIP (GB0)) ;
62     // positional selector (tril, triu, diag, offdiag, resize): can't be jumbled
63     ASSERT (GB_IMPLIES (opcode <= GB_RESIZE_opcode, !GB_JUMBLED (A))) ;
64     // entry selector: jumbled OK
65     ASSERT (GB_IMPLIES (opcode >  GB_RESIZE_opcode, GB_JUMBLED_OK (A))) ;
66 
67     GrB_Info info ;
68     bool in_place_A = (C == NULL) ; // GrB_Matrix_wait and GB_resize only
69     ASSERT (C == NULL || (C != NULL && C->static_header)) ;
70 
71     //--------------------------------------------------------------------------
72     // declare workspace
73     //--------------------------------------------------------------------------
74 
75     int64_t *restrict Zp = NULL ; size_t Zp_size = 0 ;
76     GB_WERK_DECLARE (Work, int64_t) ;
77     int64_t *restrict Wfirst = NULL ;
78     int64_t *restrict Wlast = NULL ;
79     int64_t *restrict Cp_kfirst = NULL ;
80     GB_WERK_DECLARE (A_ek_slicing, int64_t) ;
81 
82     //--------------------------------------------------------------------------
83     // get Thunk
84     //--------------------------------------------------------------------------
85 
86     // The scalar value of Thunk(0) is typecasted to an integer (int64_t
87     // ithunk) for built-in operators (tril, triu, diag, offdiag, and resize).
88     // It is also typecast to the same type as A (to the scalar athunk).  This
89     // is used for gt, ge, lt, le, ne, eq to Thunk, for built-in types.
90 
91     // If Thunk is NULL, or has no entry, it is treated as a scalar value
92     // of zero.
93 
94     const int64_t asize = A->type->size ;
95     const GB_Type_code typecode = A->type->code ;
96 
97     GB_void athunk [GB_VLA(asize)] ;
98     memset (athunk, 0, asize) ;
99     GB_void *restrict xthunk = athunk ;
100 
101     if (Thunk != NULL && GB_NNZ (Thunk) > 0)
102     {
103         // xthunk points to Thunk->x for user-defined select operators
104         xthunk = (GB_void *) Thunk->x ;
105         GB_Type_code tcode = Thunk->type->code ;
106         ithunk = 0 ;
107         if (tcode <= GB_FP64_code && opcode < GB_USER_SELECT_opcode)
108         {
109             // ithunk = (int64_t) Thunk (0)
110             size_t tsize = Thunk->type->size ;
111             GB_cast_array ((GB_void *restrict) &ithunk, GB_INT64_code,
112                 xthunk, tcode, NULL, tsize, 1, 1) ;
113             // athunk = (atype) Thunk (0)
114             GB_cast_array (athunk, typecode, xthunk, tcode, NULL, tsize, 1, 1) ;
115             // xthunk now points to the typecasted (atype) Thunk (0)
116             xthunk = athunk ;
117         }
118     }
119 
120     //--------------------------------------------------------------------------
121     // get the user-defined operator
122     //--------------------------------------------------------------------------
123 
124     GxB_select_function user_select = NULL ;
125     if (op != NULL && opcode >= GB_USER_SELECT_opcode)
126     {
127         GB_BURBLE_MATRIX (A, "(generic select: %s) ", op->name) ;
128         user_select = (GxB_select_function) (op->function) ;
129     }
130 
131     //--------------------------------------------------------------------------
132     // handle the packed case (bitmap, full, or all entries present)
133     //--------------------------------------------------------------------------
134 
135     bool use_bitmap_selector ;
136     if (opcode == GB_RESIZE_opcode || opcode == GB_NONZOMBIE_opcode)
137     {
138         // GB_bitmap_selector does not support these opcodes.  For the RESIZE
139         // and NONZOMBIE operators, A will never be bitmap.  A is converted to
140         // hypersparse first for RESIZE, and a full/bitmap matrix never has
141         // zombies.
142         use_bitmap_selector = false ;
143     }
144     else if (opcode == GB_DIAG_opcode)
145     {
146         // GB_bitmap_selector supports the DIAG operator, but it is currently
147         // not efficient (GB_bitmap_selector should return a sparse diagonal
148         // matrix, not bitmap).  So use the sparse case if A is not bitmap,
149         // since the sparse case below does not support the bitmap case.
150         use_bitmap_selector = GB_IS_BITMAP (A) ;
151     }
152     else
153     {
154         // For bitmap, full, or packed matrices (sparse/hypersparse with all
155         // entries present, not jumbled, no zombies, and no pending tuples),
156         // use the bitmap selector for all other operators (TRIL, TRIU,
157         // OFFDIAG, NONZERO, EQ*, GT*, GE*, LT*, LE*, and user-defined
158         // operators).
159         use_bitmap_selector = GB_is_packed (A) ;
160     }
161 
162     //--------------------------------------------------------------------------
163     // bitmap/full case
164     //--------------------------------------------------------------------------
165 
166     if (use_bitmap_selector)
167     {
168         // this case is only used by GB_select
169         GB_BURBLE_MATRIX (A, "(bitmap select: %s) ", op->name) ;
170         ASSERT (C != NULL && C->static_header) ;
171         return (GB_bitmap_selector (C, opcode, user_select, flipij, A,
172             ithunk, xthunk, Context)) ;
173     }
174 
175     //--------------------------------------------------------------------------
176     // get A: sparse, hypersparse, or full
177     //--------------------------------------------------------------------------
178 
179     // the case when A is bitmap is always handled above by GB_bitmap_selector
180     ASSERT (!GB_IS_BITMAP (A)) ;
181 
182     int64_t *restrict Ap = A->p ; size_t Ap_size = A->p_size ;
183     int64_t *restrict Ah = A->h ;
184     int64_t *restrict Ai = A->i ; size_t Ai_size = A->i_size ;
185     GB_void *restrict Ax = (GB_void *) A->x ; size_t Ax_size = A->x_size ;
186     int64_t avlen = A->vlen ;
187     int64_t avdim = A->vdim ;
188     int64_t anvec = A->nvec ;
189     bool A_jumbled = A->jumbled ;
190 
191     //--------------------------------------------------------------------------
192     // allocate the new vector pointers of C
193     //--------------------------------------------------------------------------
194 
195     int64_t *restrict Cp = NULL ; size_t Cp_size = 0 ;
196     int64_t *restrict Ch = NULL ; size_t Ch_size = 0 ;
197     int64_t *restrict Ci = NULL ; size_t Ci_size = 0 ;
198     GB_void *restrict Cx = NULL ; size_t Cx_size = 0 ;
199     int64_t cnz = 0 ;
200 
201     Cp = GB_CALLOC (anvec+1, int64_t, &Cp_size) ;
202     if (Cp == NULL)
203     {
204         // out of memory
205         return (GrB_OUT_OF_MEMORY) ;
206     }
207 
208     //--------------------------------------------------------------------------
209     // determine the number of threads and tasks to use
210     //--------------------------------------------------------------------------
211 
212     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
213 
214     //--------------------------------------------------------------------------
215     // slice the entries for each task
216     //--------------------------------------------------------------------------
217 
218     int A_ntasks, A_nthreads ;
219     double work = 8*anvec + ((opcode == GB_DIAG_opcode) ? 0 : GB_NNZ_HELD (A)) ;
220     GB_SLICE_MATRIX_WORK (A, 8, chunk, work) ;
221 
222     //--------------------------------------------------------------------------
223     // allocate workspace for each task
224     //--------------------------------------------------------------------------
225 
226     GB_WERK_PUSH (Work, 3*A_ntasks, int64_t) ;
227     if (Work == NULL)
228     {
229         // out of memory
230         GB_FREE_ALL ;
231         return (GrB_OUT_OF_MEMORY) ;
232     }
233     Wfirst    = Work ;
234     Wlast     = Work + A_ntasks ;
235     Cp_kfirst = Work + A_ntasks * 2 ;
236 
237     //--------------------------------------------------------------------------
238     // count the live entries in each vector
239     //--------------------------------------------------------------------------
240 
241     // Count the number of live entries in each vector of A.  The result is
242     // computed in Cp, where Cp [k] is the number of live entries in the kth
243     // vector of A.
244 
245     if (opcode <= GB_RESIZE_opcode)
246     {
247         // allocate Zp
248         Zp = GB_MALLOC_WERK (anvec, int64_t, &Zp_size) ;
249         if (Zp == NULL)
250         {
251             // out of memory
252             GB_FREE_ALL ;
253             return (GrB_OUT_OF_MEMORY) ;
254         }
255     }
256 
257     //--------------------------------------------------------------------------
258     // phase1: count the entries
259     //--------------------------------------------------------------------------
260 
261     // define the worker for the switch factory
262     #define GB_SELECT_PHASE1
263     #define GB_sel1(opname,aname) GB (_sel_phase1_ ## opname ## aname)
264     #define GB_SEL_WORKER(opname,aname,atype)               \
265     {                                                       \
266         GB_sel1 (opname, aname) (Zp, Cp, Wfirst, Wlast,     \
267             A, flipij, ithunk,                              \
268             (atype *) xthunk, user_select,                  \
269             A_ek_slicing, A_ntasks, A_nthreads) ;           \
270     }                                                       \
271     break ;
272 
273     // launch the switch factory
274     #include "GB_select_factory.c"
275 
276     #undef  GB_SELECT_PHASE1
277     #undef  GB_SEL_WORKER
278 
279     //--------------------------------------------------------------------------
280     // cumulative sum of Cp and compute Cp_kfirst
281     //--------------------------------------------------------------------------
282 
283     int64_t C_nvec_nonempty ;
284     GB_ek_slice_merge2 (&C_nvec_nonempty, Cp_kfirst, Cp, anvec,
285         Wfirst, Wlast, A_ek_slicing, A_ntasks, A_nthreads, Context) ;
286 
287     //--------------------------------------------------------------------------
288     // allocate new space for the compacted Ci and Cx
289     //--------------------------------------------------------------------------
290 
291     cnz = Cp [anvec] ;
292     cnz = GB_IMAX (cnz, 1) ;
293     Ci = GB_MALLOC (cnz, int64_t, &Ci_size) ;
294     Cx = GB_MALLOC (cnz * asize, GB_void, &Cx_size) ;
295     if (Ci == NULL || Cx == NULL)
296     {
297         // out of memory
298         GB_FREE_ALL ;
299         return (GrB_OUT_OF_MEMORY) ;
300     }
301 
302     if (opcode == GB_EQ_ZERO_opcode)
303     {
304         // Set Cx [0..cnz-1] to all zero, so that phase2 only needs to
305         // construct the pattern in Ci.
306         GB_memset (Cx, 0, cnz * asize, nthreads_max) ;
307     }
308 
309     //--------------------------------------------------------------------------
310     // phase2: select the entries
311     //--------------------------------------------------------------------------
312 
313     // define the worker for the switch factory
314     #define GB_SELECT_PHASE2
315     #define GB_sel2(opname,aname) GB (_sel_phase2_ ## opname ## aname)
316     #define GB_SEL_WORKER(opname,aname,atype)           \
317     {                                                   \
318         GB_sel2 (opname, aname) (Ci, (atype *) Cx,      \
319             Zp, Cp, Cp_kfirst,                          \
320             A, flipij, ithunk,                          \
321             (atype *) xthunk, user_select,              \
322             A_ek_slicing, A_ntasks, A_nthreads) ;       \
323     }                                                   \
324     break ;
325 
326     // launch the switch factory
327     #include "GB_select_factory.c"
328 
329     //--------------------------------------------------------------------------
330     // create the result
331     //--------------------------------------------------------------------------
332 
333     if (in_place_A)
334     {
335 
336         //----------------------------------------------------------------------
337         // transplant Cp, Ci, Cx back into A
338         //----------------------------------------------------------------------
339 
340         // TODO: this is not parallel: use GB_hyper_prune
341         if (A->h != NULL && C_nvec_nonempty < anvec)
342         {
343             // prune empty vectors from Ah and Ap
344             int64_t cnvec = 0 ;
345             for (int64_t k = 0 ; k < anvec ; k++)
346             {
347                 if (Cp [k] < Cp [k+1])
348                 {
349                     Ah [cnvec] = Ah [k] ;
350                     Ap [cnvec] = Cp [k] ;
351                     cnvec++ ;
352                 }
353             }
354             Ap [cnvec] = Cp [anvec] ;
355             A->nvec = cnvec ;
356             ASSERT (A->nvec == C_nvec_nonempty) ;
357             GB_FREE (&Cp, Cp_size) ;
358         }
359         else
360         {
361             // free the old A->p and transplant in Cp as the new A->p
362             GB_FREE (&Ap, Ap_size) ;
363             A->p = Cp ; Cp = NULL ; A->p_size = Cp_size ;
364         }
365 
366         ASSERT (Cp == NULL) ;
367 
368         GB_FREE (&Ai, Ai_size) ;
369         GB_FREE (&Ax, Ax_size) ;
370         A->i = Ci ; Ci = NULL ; A->i_size = Ci_size ;
371         A->x = Cx ; Cx = NULL ; A->x_size = Cx_size ;
372         A->nzmax = cnz ;
373         A->nvec_nonempty = C_nvec_nonempty ;
374         A->jumbled = A_jumbled ;        // A remains jumbled (in-place select)
375 
376         // the NONZOMBIE opcode may have removed all zombies, but A->nzombie
377         // is still nonzero.  It set to zero in GB_Matrix_wait.
378         ASSERT_MATRIX_OK (A, "A output for GB_selector", GB_FLIP (GB0)) ;
379 
380         // positional selector (tril, triu, diag, offdiag, resize): not jumbled
381         ASSERT (GB_IMPLIES (opcode <= GB_RESIZE_opcode, !GB_JUMBLED (A))) ;
382         // entry selector: C can be returned as jumbled
383         ASSERT (GB_IMPLIES (opcode >  GB_RESIZE_opcode, GB_JUMBLED_OK (A))) ;
384 
385     }
386     else
387     {
388 
389         //----------------------------------------------------------------------
390         // create C and transplant Cp, Ch, Ci, Cx into C
391         //----------------------------------------------------------------------
392 
393         int sparsity = (A->h != NULL) ? GxB_HYPERSPARSE : GxB_SPARSE ;
394         ASSERT (C != NULL && C->static_header) ;
395         info = GB_new (&C, true, // sparse or hyper (from A), static header
396             A->type, avlen, avdim, GB_Ap_null, true,
397             sparsity, A->hyper_switch, anvec, Context) ;
398         ASSERT (info == GrB_SUCCESS) ;
399 
400         if (A->h != NULL)
401         {
402 
403             //------------------------------------------------------------------
404             // A and C are hypersparse: copy non-empty vectors from Ah to Ch
405             //------------------------------------------------------------------
406 
407             Ch = GB_MALLOC (anvec, int64_t, &Ch_size) ;
408             if (Ch == NULL)
409             {
410                 // out of memory
411                 GB_FREE_ALL ;
412                 return (GrB_OUT_OF_MEMORY) ;
413             }
414 
415             // TODO: do in parallel: use GB_hyper_prune
416             int64_t cnvec = 0 ;
417             for (int64_t k = 0 ; k < anvec ; k++)
418             {
419                 if (Cp [k] < Cp [k+1])
420                 {
421                     Ch [cnvec] = Ah [k] ;
422                     Cp [cnvec] = Cp [k] ;
423                     cnvec++ ;
424                 }
425             }
426             Cp [cnvec] = Cp [anvec] ;
427             C->nvec = cnvec ;
428             ASSERT (C->nvec == C_nvec_nonempty) ;
429         }
430 
431         C->p = Cp ; Cp = NULL ; C->p_size = Cp_size ;
432         C->h = Ch ; Ch = NULL ; C->h_size = Ch_size ;
433         C->i = Ci ; Ci = NULL ; C->i_size = Ci_size ;
434         C->x = Cx ; Cx = NULL ; C->x_size = Cx_size ;
435         C->nzmax = cnz ;
436         C->magic = GB_MAGIC ;
437         C->nvec_nonempty = C_nvec_nonempty ;
438         C->jumbled = A_jumbled ;    // C is jumbled if A is jumbled
439 
440         ASSERT_MATRIX_OK (C, "C output for GB_selector", GB0) ;
441 
442         // positional selector (tril, triu, diag, offdiag, resize): not jumbled
443         ASSERT (GB_IMPLIES (opcode <= GB_RESIZE_opcode, !GB_JUMBLED (C))) ;
444         // entry selector: C can be returned as jumbled
445         ASSERT (GB_IMPLIES (opcode >  GB_RESIZE_opcode, GB_JUMBLED_OK (C))) ;
446     }
447 
448     //--------------------------------------------------------------------------
449     // free workspace and return result
450     //--------------------------------------------------------------------------
451 
452     GB_FREE_WORK ;
453     return (GrB_SUCCESS) ;
454 }
455 
456