1 //------------------------------------------------------------------------------
2 // GB_ek_slice.h: slice the entries and vectors of a matrix
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 #ifndef GB_EK_SLICE_H
11 #define GB_EK_SLICE_H
12 #include "GB.h"
13 
14 //------------------------------------------------------------------------------
15 // GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice
16 //------------------------------------------------------------------------------
17 
GB_ek_slice_ntasks(int * nthreads,int * ntasks,GrB_Matrix A,int ntasks_per_thread,double work,double chunk,int nthreads_max)18 static inline void GB_ek_slice_ntasks
19 (
20     // output
21     int *nthreads,              // # of threads to use for GB_ek_slice
22     int *ntasks,                // # of tasks to create for GB_ek_slice
23     // input
24     GrB_Matrix A,               // matrix to slice
25     int ntasks_per_thread,      // # of tasks per thread
26     double work,                // total work to do
27     double chunk,               // give each thread at least this much work
28     int nthreads_max            // max # of threads to use
29 )
30 {
31     int64_t anz = GB_NNZ_HELD (A) ;
32     if (anz == 0)
33     {
34         (*nthreads) = 1 ;
35         (*ntasks) = 1 ;
36     }
37     else
38     {
39         (*nthreads) = GB_nthreads (work, chunk, nthreads_max) ;
40         (*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ;
41         (*ntasks) = GB_IMIN (*ntasks, anz) ;
42         (*ntasks) = GB_IMAX (*ntasks, 1) ;
43     }
44 }
45 
46 //------------------------------------------------------------------------------
47 // GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice
48 //------------------------------------------------------------------------------
49 
50 #define GB_SLICE_MATRIX_WORK(X,NTASKS_PER_THREAD,chunk,work)                  \
51     GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), X,                \
52         NTASKS_PER_THREAD, work, chunk, nthreads_max) ;                       \
53     GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ;            \
54     if (X ## _ek_slicing == NULL)                                             \
55     {                                                                         \
56         /* out of memory */                                                   \
57         GB_FREE_ALL ;                                                         \
58         return (GrB_OUT_OF_MEMORY) ;                                          \
59     }                                                                         \
60     GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;                         \
61     const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ;                 \
62     const int64_t *klast_  ## X ## slice = X ## _ek_slicing + X ## _ntasks ;  \
63     const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;
64 
65 #define GB_SLICE_MATRIX(X,NTASKS_PER_THREAD,chunk)                            \
66     GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), X,                \
67         NTASKS_PER_THREAD, GB_NNZ_HELD (X) + X->nvec, chunk, nthreads_max) ;  \
68     GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ;            \
69     if (X ## _ek_slicing == NULL)                                             \
70     {                                                                         \
71         /* out of memory */                                                   \
72         GB_FREE_ALL ;                                                         \
73         return (GrB_OUT_OF_MEMORY) ;                                          \
74     }                                                                         \
75     GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ;                         \
76     const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ;                 \
77     const int64_t *klast_  ## X ## slice = X ## _ek_slicing + X ## _ntasks ;  \
78     const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;
79 
80 //------------------------------------------------------------------------------
81 // GB_ek_slice prototypes
82 //------------------------------------------------------------------------------
83 
84 // Slice the entries of a matrix or vector into ntasks slices.
85 
86 // Task t does entries pstart_slice [t] to pstart_slice [t+1]-1 and
87 // vectors kfirst_slice [t] to klast_slice [t].  The first and last vectors
88 // may be shared with prior slices and subsequent slices.
89 
90 // On input, ntasks must be <= nnz (A), unless nnz (A) is zero.  In that
91 // case, ntasks must be 1.
92 
93 void GB_ek_slice            // slice a matrix
94 (
95     // output:
96     int64_t *restrict A_ek_slicing,  // size 3*ntasks+1
97     // input:
98     GrB_Matrix A,                       // matrix to slice
99     int ntasks                          // # of tasks
100 ) ;
101 
102 void GB_ek_slice_merge1     // merge column counts for the matrix C
103 (
104     // input/output:
105     int64_t *restrict Cp,                    // column counts
106     // input:
107     const int64_t *restrict Wfirst,          // size ntasks
108     const int64_t *restrict Wlast,           // size ntasks
109     const int64_t *ek_slicing,                  // size 3*ntasks+1
110     const int ntasks                            // # of tasks
111 ) ;
112 
113 void GB_ek_slice_merge2     // merge final results for matrix C
114 (
115     // output
116     int64_t *C_nvec_nonempty,           // # of non-empty vectors in C
117     int64_t *restrict Cp_kfirst,     // size ntasks
118     // input/output
119     int64_t *restrict Cp,            // size cnvec+1
120     // input
121     const int64_t cnvec,
122     const int64_t *restrict Wfirst,          // size ntasks
123     const int64_t *restrict Wlast,           // size ntasks
124     const int64_t *ek_slicing,                  // size 3*ntasks+1
125     const int ntasks,                   // # of tasks used to construct C
126     const int nthreads,                 // # of threads to use
127     GB_Context Context
128 ) ;
129 
130 //------------------------------------------------------------------------------
131 // GB_get_pA_and_pC: find the part of A(:,k) and C(:,k) for this task
132 //------------------------------------------------------------------------------
133 
134 // The tasks were generated by GB_ek_slice.
135 
GB_get_pA_and_pC(int64_t * pA_start,int64_t * pA_end,int64_t * pC,int tid,int64_t k,int64_t kfirst,int64_t klast,const int64_t * restrict pstart_slice,const int64_t * restrict Cp_kfirst,const int64_t * restrict Cp,int64_t cvlen,const int64_t * restrict Ap,int64_t avlen)136 static inline void GB_get_pA_and_pC
137 (
138     // output
139     int64_t *pA_start,
140     int64_t *pA_end,
141     int64_t *pC,
142     // input
143     int tid,            // task id
144     int64_t k,          // current vector
145     int64_t kfirst,     // first vector for this slice
146     int64_t klast,      // last vector for this slice
147     const int64_t *restrict pstart_slice,   // start of each slice in A
148     const int64_t *restrict Cp_kfirst,      // start of each slice in C
149     const int64_t *restrict Cp,             // vector pointers for C
150     int64_t cvlen,                             // C->vlen
151     const int64_t *restrict Ap,             // vector pointers for A
152     int64_t avlen                              // A->vlen
153 )
154 {
155 
156     int64_t p0 = GBP (Ap, k, avlen) ;
157     int64_t p1 = GBP (Ap, k+1, avlen) ;
158 
159     if (k == kfirst)
160     {
161         // First vector for task tid; may only be partially owned.
162         (*pA_start) = pstart_slice [tid] ;
163         (*pA_end  ) = GB_IMIN (p1, pstart_slice [tid+1]) ;
164         (*pC) = Cp_kfirst [tid] ;
165     }
166     else if (k == klast)
167     {
168         // Last vector for task tid; may only be partially owned.
169         (*pA_start) = p0 ;
170         (*pA_end  ) = pstart_slice [tid+1] ;
171         (*pC) = GBP (Cp, k, cvlen) ;
172     }
173     else
174     {
175         // task tid entirely owns this vector A(:,k).
176         (*pA_start) = p0 ;
177         (*pA_end  ) = p1 ;
178         (*pC) = GBP (Cp, k, cvlen) ;
179     }
180 }
181 
182 //------------------------------------------------------------------------------
183 // GB_get_pA: find the part of A(:,k) to be operated on by this task
184 //------------------------------------------------------------------------------
185 
186 // The tasks were generated by GB_ek_slice.
187 
GB_get_pA(int64_t * pA_start,int64_t * pA_end,int tid,int64_t k,int64_t kfirst,int64_t klast,const int64_t * restrict pstart_slice,const int64_t * restrict Ap,int64_t avlen)188 static inline void GB_get_pA
189 (
190     // output
191     int64_t *pA_start,
192     int64_t *pA_end,
193     // input
194     int tid,            // task id
195     int64_t k,          // current vector
196     int64_t kfirst,     // first vector for this slice
197     int64_t klast,      // last vector for this slice
198     const int64_t *restrict pstart_slice,   // start of each slice in A
199     const int64_t *restrict Ap,             // vector pointers for A
200     int64_t avlen                              // A->vlen
201 )
202 {
203 
204     int64_t p0 = GBP (Ap, k, avlen) ;
205     int64_t p1 = GBP (Ap, k+1, avlen) ;
206 
207     if (k == kfirst)
208     {
209         // First vector for task tid; may only be partially owned.
210         (*pA_start) = pstart_slice [tid] ;
211         (*pA_end  ) = GB_IMIN (p1, pstart_slice [tid+1]) ;
212     }
213     else if (k == klast)
214     {
215         // Last vector for task tid; may only be partially owned.
216         (*pA_start) = p0 ;
217         (*pA_end  ) = pstart_slice [tid+1] ;
218     }
219     else
220     {
221         // task tid entirely owns this vector A(:,k).
222         (*pA_start) = p0 ;
223         (*pA_end  ) = p1 ;
224     }
225 }
226 
227 #endif
228 
229