1 //------------------------------------------------------------------------------
2 // GB_ek_slice.h: slice the entries and vectors of a matrix
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 #ifndef GB_EK_SLICE_H
11 #define GB_EK_SLICE_H
12 #include "GB.h"
13
14 //------------------------------------------------------------------------------
15 // GB_ek_slice_ntasks: determine # of threads and tasks to use for GB_ek_slice
16 //------------------------------------------------------------------------------
17
GB_ek_slice_ntasks(int * nthreads,int * ntasks,GrB_Matrix A,int ntasks_per_thread,double work,double chunk,int nthreads_max)18 static inline void GB_ek_slice_ntasks
19 (
20 // output
21 int *nthreads, // # of threads to use for GB_ek_slice
22 int *ntasks, // # of tasks to create for GB_ek_slice
23 // input
24 GrB_Matrix A, // matrix to slice
25 int ntasks_per_thread, // # of tasks per thread
26 double work, // total work to do
27 double chunk, // give each thread at least this much work
28 int nthreads_max // max # of threads to use
29 )
30 {
31 int64_t anz = GB_NNZ_HELD (A) ;
32 if (anz == 0)
33 {
34 (*nthreads) = 1 ;
35 (*ntasks) = 1 ;
36 }
37 else
38 {
39 (*nthreads) = GB_nthreads (work, chunk, nthreads_max) ;
40 (*ntasks) = (*nthreads == 1) ? 1 : ((ntasks_per_thread) * (*nthreads)) ;
41 (*ntasks) = GB_IMIN (*ntasks, anz) ;
42 (*ntasks) = GB_IMAX (*ntasks, 1) ;
43 }
44 }
45
46 //------------------------------------------------------------------------------
47 // GB_SLICE_MATRIX: slice a single matrix using GB_ek_slice
48 //------------------------------------------------------------------------------
49
50 #define GB_SLICE_MATRIX_WORK(X,NTASKS_PER_THREAD,chunk,work) \
51 GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), X, \
52 NTASKS_PER_THREAD, work, chunk, nthreads_max) ; \
53 GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ; \
54 if (X ## _ek_slicing == NULL) \
55 { \
56 /* out of memory */ \
57 GB_FREE_ALL ; \
58 return (GrB_OUT_OF_MEMORY) ; \
59 } \
60 GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ; \
61 const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ; \
62 const int64_t *klast_ ## X ## slice = X ## _ek_slicing + X ## _ntasks ; \
63 const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;
64
65 #define GB_SLICE_MATRIX(X,NTASKS_PER_THREAD,chunk) \
66 GB_ek_slice_ntasks (&(X ## _nthreads), &(X ## _ntasks), X, \
67 NTASKS_PER_THREAD, GB_NNZ_HELD (X) + X->nvec, chunk, nthreads_max) ; \
68 GB_WERK_PUSH (X ## _ek_slicing, 3*(X ## _ntasks)+1, int64_t) ; \
69 if (X ## _ek_slicing == NULL) \
70 { \
71 /* out of memory */ \
72 GB_FREE_ALL ; \
73 return (GrB_OUT_OF_MEMORY) ; \
74 } \
75 GB_ek_slice (X ## _ek_slicing, X, X ## _ntasks) ; \
76 const int64_t *kfirst_ ## X ## slice = X ## _ek_slicing ; \
77 const int64_t *klast_ ## X ## slice = X ## _ek_slicing + X ## _ntasks ; \
78 const int64_t *pstart_ ## X ## slice = X ## _ek_slicing + X ## _ntasks*2 ;
79
80 //------------------------------------------------------------------------------
81 // GB_ek_slice prototypes
82 //------------------------------------------------------------------------------
83
84 // Slice the entries of a matrix or vector into ntasks slices.
85
86 // Task t does entries pstart_slice [t] to pstart_slice [t+1]-1 and
87 // vectors kfirst_slice [t] to klast_slice [t]. The first and last vectors
88 // may be shared with prior slices and subsequent slices.
89
90 // On input, ntasks must be <= nnz (A), unless nnz (A) is zero. In that
91 // case, ntasks must be 1.
92
93 void GB_ek_slice // slice a matrix
94 (
95 // output:
96 int64_t *restrict A_ek_slicing, // size 3*ntasks+1
97 // input:
98 GrB_Matrix A, // matrix to slice
99 int ntasks // # of tasks
100 ) ;
101
102 void GB_ek_slice_merge1 // merge column counts for the matrix C
103 (
104 // input/output:
105 int64_t *restrict Cp, // column counts
106 // input:
107 const int64_t *restrict Wfirst, // size ntasks
108 const int64_t *restrict Wlast, // size ntasks
109 const int64_t *ek_slicing, // size 3*ntasks+1
110 const int ntasks // # of tasks
111 ) ;
112
113 void GB_ek_slice_merge2 // merge final results for matrix C
114 (
115 // output
116 int64_t *C_nvec_nonempty, // # of non-empty vectors in C
117 int64_t *restrict Cp_kfirst, // size ntasks
118 // input/output
119 int64_t *restrict Cp, // size cnvec+1
120 // input
121 const int64_t cnvec,
122 const int64_t *restrict Wfirst, // size ntasks
123 const int64_t *restrict Wlast, // size ntasks
124 const int64_t *ek_slicing, // size 3*ntasks+1
125 const int ntasks, // # of tasks used to construct C
126 const int nthreads, // # of threads to use
127 GB_Context Context
128 ) ;
129
130 //------------------------------------------------------------------------------
131 // GB_get_pA_and_pC: find the part of A(:,k) and C(:,k) for this task
132 //------------------------------------------------------------------------------
133
134 // The tasks were generated by GB_ek_slice.
135
GB_get_pA_and_pC(int64_t * pA_start,int64_t * pA_end,int64_t * pC,int tid,int64_t k,int64_t kfirst,int64_t klast,const int64_t * restrict pstart_slice,const int64_t * restrict Cp_kfirst,const int64_t * restrict Cp,int64_t cvlen,const int64_t * restrict Ap,int64_t avlen)136 static inline void GB_get_pA_and_pC
137 (
138 // output
139 int64_t *pA_start,
140 int64_t *pA_end,
141 int64_t *pC,
142 // input
143 int tid, // task id
144 int64_t k, // current vector
145 int64_t kfirst, // first vector for this slice
146 int64_t klast, // last vector for this slice
147 const int64_t *restrict pstart_slice, // start of each slice in A
148 const int64_t *restrict Cp_kfirst, // start of each slice in C
149 const int64_t *restrict Cp, // vector pointers for C
150 int64_t cvlen, // C->vlen
151 const int64_t *restrict Ap, // vector pointers for A
152 int64_t avlen // A->vlen
153 )
154 {
155
156 int64_t p0 = GBP (Ap, k, avlen) ;
157 int64_t p1 = GBP (Ap, k+1, avlen) ;
158
159 if (k == kfirst)
160 {
161 // First vector for task tid; may only be partially owned.
162 (*pA_start) = pstart_slice [tid] ;
163 (*pA_end ) = GB_IMIN (p1, pstart_slice [tid+1]) ;
164 (*pC) = Cp_kfirst [tid] ;
165 }
166 else if (k == klast)
167 {
168 // Last vector for task tid; may only be partially owned.
169 (*pA_start) = p0 ;
170 (*pA_end ) = pstart_slice [tid+1] ;
171 (*pC) = GBP (Cp, k, cvlen) ;
172 }
173 else
174 {
175 // task tid entirely owns this vector A(:,k).
176 (*pA_start) = p0 ;
177 (*pA_end ) = p1 ;
178 (*pC) = GBP (Cp, k, cvlen) ;
179 }
180 }
181
182 //------------------------------------------------------------------------------
183 // GB_get_pA: find the part of A(:,k) to be operated on by this task
184 //------------------------------------------------------------------------------
185
186 // The tasks were generated by GB_ek_slice.
187
GB_get_pA(int64_t * pA_start,int64_t * pA_end,int tid,int64_t k,int64_t kfirst,int64_t klast,const int64_t * restrict pstart_slice,const int64_t * restrict Ap,int64_t avlen)188 static inline void GB_get_pA
189 (
190 // output
191 int64_t *pA_start,
192 int64_t *pA_end,
193 // input
194 int tid, // task id
195 int64_t k, // current vector
196 int64_t kfirst, // first vector for this slice
197 int64_t klast, // last vector for this slice
198 const int64_t *restrict pstart_slice, // start of each slice in A
199 const int64_t *restrict Ap, // vector pointers for A
200 int64_t avlen // A->vlen
201 )
202 {
203
204 int64_t p0 = GBP (Ap, k, avlen) ;
205 int64_t p1 = GBP (Ap, k+1, avlen) ;
206
207 if (k == kfirst)
208 {
209 // First vector for task tid; may only be partially owned.
210 (*pA_start) = pstart_slice [tid] ;
211 (*pA_end ) = GB_IMIN (p1, pstart_slice [tid+1]) ;
212 }
213 else if (k == klast)
214 {
215 // Last vector for task tid; may only be partially owned.
216 (*pA_start) = p0 ;
217 (*pA_end ) = pstart_slice [tid+1] ;
218 }
219 else
220 {
221 // task tid entirely owns this vector A(:,k).
222 (*pA_start) = p0 ;
223 (*pA_end ) = p1 ;
224 }
225 }
226
227 #endif
228
229