1 //------------------------------------------------------------------------------
2 // GB_subassign_one_slice: slice the entries and vectors for subassign
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Constructs a set of tasks to compute C for a subassign method, based on
11 // slicing a single input matrix (M or A). Fine tasks must also find their
12 // location in their vector C(:,jC). Currently this method is only used to
13 // slice M, not A.
14
15 // This method is used by GB_subassign_05, 06n, and 07. Each of those methods
16 // apply this function to M, but they use TaskList[...].pA and pA_end to
17 // partition the matrix.
18
19 // ===================== ==============
20 // M cmp rpl acc A S method: action
21 // ===================== ==============
22 // M - - - - - 05: C(I,J)<M> = x for M
23 // M - - + - - 07: C(I,J)<M> += x for M
24 // M - - - A - 06n: C(I,J)<M> = A for M
25
26 // C: not bitmap
27
28 #include "GB_subassign_methods.h"
29
30 #undef GB_FREE_WORK
31 #define GB_FREE_WORK \
32 { \
33 GB_WERK_POP (Coarse, int64_t) ; \
34 }
35
36 #undef GB_FREE_ALL
37 #define GB_FREE_ALL \
38 { \
39 GB_FREE_WORK ; \
40 GB_FREE_WERK (&TaskList, TaskList_size) ; \
41 }
42
43 //------------------------------------------------------------------------------
44 // GB_subassign_one_slice
45 //------------------------------------------------------------------------------
46
GB_subassign_one_slice(GB_task_struct ** p_TaskList,size_t * p_TaskList_size,int * p_ntasks,int * p_nthreads,const GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,GB_Context Context)47 GrB_Info GB_subassign_one_slice
48 (
49 // output:
50 GB_task_struct **p_TaskList, // array of structs
51 size_t *p_TaskList_size, // size of TaskList
52 int *p_ntasks, // # of tasks constructed
53 int *p_nthreads, // # of threads to use
54 // input:
55 const GrB_Matrix C, // output matrix C
56 const GrB_Index *I,
57 const int64_t nI,
58 const int Ikind,
59 const int64_t Icolon [3],
60 const GrB_Index *J,
61 const int64_t nJ,
62 const int Jkind,
63 const int64_t Jcolon [3],
64 const GrB_Matrix M, // matrix to slice
65 GB_Context Context
66 )
67 {
68
69 //--------------------------------------------------------------------------
70 // check inputs
71 //--------------------------------------------------------------------------
72
73 ASSERT (p_TaskList != NULL) ;
74 ASSERT (p_ntasks != NULL) ;
75 ASSERT (p_nthreads != NULL) ;
76 ASSERT_MATRIX_OK (C, "C for 1_slice", GB0) ;
77 ASSERT_MATRIX_OK (M, "M for 1_slice", GB0) ;
78
79 ASSERT (!GB_IS_BITMAP (C)) ;
80
81 ASSERT (!GB_JUMBLED (C)) ;
82 ASSERT (!GB_JUMBLED (M)) ;
83
84 (*p_TaskList ) = NULL ;
85 (*p_ntasks ) = 0 ;
86 (*p_nthreads ) = 1 ;
87
88 //--------------------------------------------------------------------------
89 // determine # of threads to use
90 //--------------------------------------------------------------------------
91
92 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
93
94 //--------------------------------------------------------------------------
95 // get M and C
96 //--------------------------------------------------------------------------
97
98 const int64_t *restrict Mp = M->p ;
99 const int64_t *restrict Mh = M->h ;
100 // const int8_t *restrict Mb = M->b ;
101 const int64_t *restrict Mi = M->i ;
102 const int64_t mnz = GB_NNZ_HELD (M) ;
103 const int64_t mnvec = M->nvec ;
104 const int64_t mvlen = M->vlen ;
105
106 const int64_t *restrict Cp = C->p ;
107 const int64_t *restrict Ch = C->h ;
108 const int64_t *restrict Ci = C->i ;
109 const bool C_is_hyper = (Ch != NULL) ;
110 const int64_t nzombies = C->nzombies ;
111 const int64_t Cnvec = C->nvec ;
112 const int64_t Cvlen = C->vlen ;
113
114 //--------------------------------------------------------------------------
115 // allocate the initial TaskList
116 //--------------------------------------------------------------------------
117
118 GB_WERK_DECLARE (Coarse, int64_t) ; // size ntasks1+1
119 int ntasks1 = 0 ;
120 int nthreads = GB_nthreads (mnz, chunk, nthreads_max) ;
121 GB_task_struct *restrict TaskList = NULL ; size_t TaskList_size = 0 ;
122 int max_ntasks = 0 ;
123 int ntasks = 0 ;
124 int ntasks0 = (nthreads == 1) ? 1 : (32 * nthreads) ;
125 GB_REALLOC_TASK_WERK (TaskList, ntasks0, max_ntasks) ;
126
127 //--------------------------------------------------------------------------
128 // check for quick return for a single task
129 //--------------------------------------------------------------------------
130
131 if (mnvec == 0 || ntasks0 == 1)
132 {
133 // construct a single coarse task that does all the work
134 TaskList [0].kfirst = 0 ;
135 TaskList [0].klast = mnvec-1 ;
136 (*p_TaskList ) = TaskList ;
137 (*p_TaskList_size) = TaskList_size ;
138 (*p_ntasks ) = (mnvec == 0) ? 0 : 1 ;
139 (*p_nthreads ) = 1 ;
140 return (GrB_SUCCESS) ;
141 }
142
143 //--------------------------------------------------------------------------
144 // determine # of threads and tasks for the subassign operation
145 //--------------------------------------------------------------------------
146
147 double target_task_size = ((double) mnz) / (double) (ntasks0) ;
148 target_task_size = GB_IMAX (target_task_size, chunk) ;
149 ntasks1 = ((double) mnz) / target_task_size ;
150 ntasks1 = GB_IMAX (ntasks1, 1) ;
151
152 //--------------------------------------------------------------------------
153 // slice the work into coarse tasks
154 //--------------------------------------------------------------------------
155
156 // M may be hypersparse, sparse, bitmap, or full
157 GB_WERK_PUSH (Coarse, ntasks1 + 1, int64_t) ;
158 if (Coarse == NULL)
159 {
160 // out of memory
161 GB_FREE_ALL ;
162 return (GrB_OUT_OF_MEMORY) ;
163 }
164 GB_pslice (Coarse, Mp, mnvec, ntasks1, false) ;
165
166 //--------------------------------------------------------------------------
167 // construct all tasks, both coarse and fine
168 //--------------------------------------------------------------------------
169
170 for (int t = 0 ; t < ntasks1 ; t++)
171 {
172
173 //----------------------------------------------------------------------
174 // coarse task computes C (I, J(k:klast)) = M (I, k:klast)
175 //----------------------------------------------------------------------
176
177 int64_t k = Coarse [t] ;
178 int64_t klast = Coarse [t+1] - 1 ;
179
180 if (k >= mnvec)
181 {
182
183 //------------------------------------------------------------------
184 // all tasks have been constructed
185 //------------------------------------------------------------------
186
187 break ;
188
189 }
190 else if (k < klast)
191 {
192
193 //------------------------------------------------------------------
194 // coarse task has 2 or more vectors
195 //------------------------------------------------------------------
196
197 // This is a non-empty coarse-grain task that does two or more
198 // entire vectors of M, vectors k:klast, inclusive.
199 GB_REALLOC_TASK_WERK (TaskList, ntasks + 1, max_ntasks) ;
200 TaskList [ntasks].kfirst = k ;
201 TaskList [ntasks].klast = klast ;
202 ntasks++ ;
203
204 }
205 else
206 {
207
208 //------------------------------------------------------------------
209 // coarse task has 0 or 1 vectors
210 //------------------------------------------------------------------
211
212 // As a coarse-grain task, this task is empty or does a single
213 // vector, k. Vector k must be removed from the work done by this
214 // and any other coarse-grain task, and split into one or more
215 // fine-grain tasks.
216
217 for (int tt = t ; tt < ntasks1 ; tt++)
218 {
219 // remove k from the initial slice tt
220 if (Coarse [tt] == k)
221 {
222 // remove k from task tt
223 Coarse [tt] = k+1 ;
224 }
225 else
226 {
227 // break, k not in task tt
228 break ;
229 }
230 }
231
232 //------------------------------------------------------------------
233 // get the vector of C
234 //------------------------------------------------------------------
235
236 ASSERT (k >= 0 && k < mnvec) ;
237 int64_t j = GBH (Mh, k) ;
238 ASSERT (j >= 0 && j < nJ) ;
239 int64_t GB_LOOKUP_jC ;
240
241 bool jC_dense = (pC_end - pC_start == Cvlen) ;
242
243 //------------------------------------------------------------------
244 // determine the # of fine-grain tasks to create for vector k
245 //------------------------------------------------------------------
246
247 int64_t mknz = (Mp == NULL) ? mvlen : (Mp [k+1] - Mp [k]) ;
248 int nfine = ((double) mknz) / target_task_size ;
249 nfine = GB_IMAX (nfine, 1) ;
250
251 // make the TaskList bigger, if needed
252 GB_REALLOC_TASK_WERK (TaskList, ntasks + nfine, max_ntasks) ;
253
254 //------------------------------------------------------------------
255 // create the fine-grain tasks
256 //------------------------------------------------------------------
257
258 if (nfine == 1)
259 {
260
261 //--------------------------------------------------------------
262 // this is a single coarse task for all of vector k
263 //--------------------------------------------------------------
264
265 TaskList [ntasks].kfirst = k ;
266 TaskList [ntasks].klast = k ;
267 ntasks++ ;
268
269 }
270 else
271 {
272
273 //--------------------------------------------------------------
274 // slice vector M(:,k) into nfine fine tasks
275 //--------------------------------------------------------------
276
277 ASSERT (ntasks < max_ntasks) ;
278
279 for (int tfine = 0 ; tfine < nfine ; tfine++)
280 {
281
282 // this fine task operates on vector M(:,k)
283 TaskList [ntasks].kfirst = k ;
284 TaskList [ntasks].klast = -1 ;
285
286 // slice M(:,k) for this task
287 int64_t p1, p2 ;
288 GB_PARTITION (p1, p2, mknz, tfine, nfine) ;
289 int64_t pM_start = GBP (Mp, k, mvlen) ;
290 int64_t pM = pM_start + p1 ;
291 int64_t pM_end = pM_start + p2 ;
292 TaskList [ntasks].pA = pM ;
293 TaskList [ntasks].pA_end = pM_end ;
294
295 if (jC_dense)
296 {
297 // do not slice C(:,jC) if it is dense
298 TaskList [ntasks].pC = pC_start ;
299 TaskList [ntasks].pC_end = pC_end ;
300 }
301 else
302 {
303 // find where this task starts and ends in C(:,jC)
304 int64_t iM_start = GBI (Mi, pM, mvlen) ;
305 int64_t iC1 = GB_ijlist (I, iM_start, Ikind, Icolon) ;
306 int64_t iM_end = GBI (Mi, pM_end-1, mvlen) ;
307 int64_t iC2 = GB_ijlist (I, iM_end, Ikind, Icolon) ;
308
309 // If I is an explicit list, it must be already sorted
310 // in ascending order, and thus iC1 <= iC2. If I is
311 // GB_ALL or GB_STRIDE with inc >= 0, then iC1 < iC2.
312 // But if inc < 0, then iC1 > iC2. iC_start and iC_end
313 // are used for a binary search bracket, so iC_start <=
314 // iC_end must hold.
315 int64_t iC_start = GB_IMIN (iC1, iC2) ;
316 int64_t iC_end = GB_IMAX (iC1, iC2) ;
317
318 // this task works on Ci,Cx [pC:pC_end-1]
319 int64_t pleft = pC_start ;
320 int64_t pright = pC_end - 1 ;
321 bool found, is_zombie ;
322 GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_start, Ci,
323 pleft, pright, found, nzombies, is_zombie) ;
324 TaskList [ntasks].pC = pleft ;
325
326 pleft = pC_start ;
327 pright = pC_end - 1 ;
328 GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_end, Ci,
329 pleft, pright, found, nzombies, is_zombie) ;
330 TaskList [ntasks].pC_end = (found) ? (pleft+1) : pleft ;
331 }
332
333 ASSERT (TaskList [ntasks].pA <= TaskList [ntasks].pA_end) ;
334 ASSERT (TaskList [ntasks].pC <= TaskList [ntasks].pC_end) ;
335 ntasks++ ;
336 }
337 }
338 }
339 }
340
341 ASSERT (ntasks <= max_ntasks) ;
342
343 //--------------------------------------------------------------------------
344 // free workspace and return result
345 //--------------------------------------------------------------------------
346
347 GB_FREE_WORK ;
348 (*p_TaskList ) = TaskList ;
349 (*p_TaskList_size) = TaskList_size ;
350 (*p_ntasks ) = ntasks ;
351 (*p_nthreads ) = nthreads ;
352 return (GrB_SUCCESS) ;
353 }
354
355