1 //------------------------------------------------------------------------------
2 // GB_split_sparse: split a sparse/hypersparse matrix into tiles
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 #define GB_FREE_WORK \
11 GB_WERK_POP (C_ek_slicing, int64_t) ; \
12 GB_FREE_WERK (&Wp, Wp_size) ;
13
14 #define GB_FREE_ALL \
15 GB_FREE_WORK ; \
16 GB_Matrix_free (&C) ;
17
18 #include "GB_split.h"
19
GB_split_sparse(GrB_Matrix * Tiles,const GrB_Index m,const GrB_Index n,const int64_t * restrict Tile_rows,const int64_t * restrict Tile_cols,const GrB_Matrix A,GB_Context Context)20 GrB_Info GB_split_sparse // split a sparse matrix
21 (
22 GrB_Matrix *Tiles, // 2D row-major array of size m-by-n
23 const GrB_Index m,
24 const GrB_Index n,
25 const int64_t *restrict Tile_rows, // size m+1
26 const int64_t *restrict Tile_cols, // size n+1
27 const GrB_Matrix A, // input matrix
28 GB_Context Context
29 )
30 {
31
32 //--------------------------------------------------------------------------
33 // get inputs
34 //--------------------------------------------------------------------------
35
36 GrB_Info info ;
37 int A_sparsity = GB_sparsity (A) ;
38 bool A_is_hyper = (A_sparsity == GxB_HYPERSPARSE) ;
39 ASSERT (A_is_hyper || A_sparsity == GxB_SPARSE) ;
40 GrB_Matrix C = NULL ;
41 GB_WERK_DECLARE (C_ek_slicing, int64_t) ;
42 ASSERT_MATRIX_OK (A, "A sparse for split", GB0) ;
43
44 int sparsity_control = A->sparsity ;
45 float hyper_switch = A->hyper_switch ;
46 bool csc = A->is_csc ;
47 GrB_Type atype = A->type ;
48 int64_t avlen = A->vlen ;
49 int64_t avdim = A->vdim ;
50 size_t asize = atype->size ;
51
52 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
53
54 int64_t nouter = csc ? n : m ;
55 int64_t ninner = csc ? m : n ;
56
57 const int64_t *Tile_vdim = csc ? Tile_cols : Tile_rows ;
58 const int64_t *Tile_vlen = csc ? Tile_rows : Tile_cols ;
59
60 int64_t anvec = A->nvec ;
61 const int64_t *restrict Ap = A->p ;
62 const int64_t *restrict Ah = A->h ;
63 const int64_t *restrict Ai = A->i ;
64
65 //--------------------------------------------------------------------------
66 // allocate workspace
67 //--------------------------------------------------------------------------
68
69 size_t Wp_size = 0 ;
70 int64_t *restrict Wp = NULL ;
71 Wp = GB_MALLOC_WERK (anvec, int64_t, &Wp_size) ;
72 if (Wp == NULL)
73 {
74 // out of memory
75 GB_FREE_ALL ;
76 return (GrB_OUT_OF_MEMORY) ;
77 }
78 GB_memcpy (Wp, Ap, anvec * sizeof (int64_t), nthreads_max) ;
79
80 //--------------------------------------------------------------------------
81 // split A into tiles
82 //--------------------------------------------------------------------------
83
84 int64_t akend = 0 ;
85
86 for (int64_t outer = 0 ; outer < nouter ; outer++)
87 {
88
89 //----------------------------------------------------------------------
90 // find the starting and ending vector of these tiles
91 //----------------------------------------------------------------------
92
93 // The tile appears in vectors avstart:avend-1 of A, and indices
94 // aistart:aiend-1.
95
96 const int64_t avstart = Tile_vdim [outer] ;
97 const int64_t avend = Tile_vdim [outer+1] ;
98 int64_t akstart = akend ;
99
100 if (A_is_hyper)
101 {
102 // A is hypersparse: look for vector avend in the A->h hyper list.
103 // The vectors to handle for this outer loop are in
104 // Ah [akstart:akend-1].
105 akend = akstart ;
106 int64_t pright = anvec - 1 ;
107 bool found ;
108 GB_SPLIT_BINARY_SEARCH (avend, Ah, akend, pright, found) ;
109 // printf ("anvec %ld akstart: %ld akend: %ld avend %ld\n",
110 // anvec, akstart, akend, avend) ;
111 ASSERT (GB_IMPLIES (akstart <= akend-1, Ah [akend-1] < avend)) ;
112 // for (int64_t k = akstart ; k < akend ; k++)
113 // {
114 // printf (" Ah [%ld] = %ld\n", k, Ah [k]) ;
115 // }
116 }
117 else
118 {
119 // A is sparse; the vectors to handle are akstart:akend-1
120 akend = avend ;
121 }
122
123 // # of vectors in all tiles in this outer loop
124 int64_t cnvec = akend - akstart ;
125 int nth = GB_nthreads (cnvec, chunk, nthreads_max) ;
126 // printf ("akend %ld\n", akend) ;
127
128 //----------------------------------------------------------------------
129 // create all tiles for vectors akstart:akend-1 in A
130 //----------------------------------------------------------------------
131
132 for (int64_t inner = 0 ; inner < ninner ; inner++)
133 {
134 // printf ("\ninner: %ld, cnvec %ld akstart %ld akend %ld\n",
135 // inner, cnvec, akstart, akend) ;
136
137 //------------------------------------------------------------------
138 // allocate C, C->p, and C->h for this tile
139 //------------------------------------------------------------------
140
141 const int64_t aistart = Tile_vlen [inner] ;
142 const int64_t aiend = Tile_vlen [inner+1] ;
143 const int64_t cvdim = avend - avstart ;
144 const int64_t cvlen = aiend - aistart ;
145
146 C = NULL ;
147 GB_OK (GB_new (&C, false, // new header
148 atype, cvlen, cvdim, GB_Ap_malloc, csc, A_sparsity,
149 hyper_switch, cnvec, Context)) ;
150 C->sparsity = sparsity_control ;
151 C->hyper_switch = hyper_switch ;
152 C->nvec = cnvec ;
153 int64_t *restrict Cp = C->p ;
154 int64_t *restrict Ch = C->h ;
155
156 //------------------------------------------------------------------
157 // determine the boundaries of this tile
158 //------------------------------------------------------------------
159
160 int64_t k ;
161 #pragma omp parallel for num_threads(nth) schedule(static)
162 for (k = akstart ; k < akend ; k++)
163 {
164 // printf ("look in kth vector of A: k = %ld\n", k) ;
165 int64_t pA = Wp [k] ;
166 int64_t pA_end = Ap [k+1] ;
167 int64_t aknz = pA_end - pA ;
168 // printf ("Wp [%ld] = %ld\n", k, Wp [k]) ;
169 // printf ("Ap [%ld+1] = %ld\n", k, Ap [k+1]) ;
170 // printf ("aknz %ld\n", aknz) ;
171 if (aknz == 0 || Ai [pA] >= aiend)
172 {
173 // this vector of C is empty
174 // printf ("empty\n") ;
175 }
176 else if (aknz > 256)
177 {
178 // use binary search to find aiend
179 bool found ;
180 int64_t pright = pA_end - 1 ;
181 GB_SPLIT_BINARY_SEARCH (aiend, Ai, pA, pright, found) ;
182 #ifdef GB_DEBUG
183 // check the results with a linear search
184 int64_t p2 = Wp [k] ;
185 for ( ; p2 < Ap [k+1] ; p2++)
186 {
187 if (Ai [p2] >= aiend) break ;
188 }
189 ASSERT (pA == p2) ;
190 #endif
191 }
192 else
193 {
194 // use a linear-time search to find aiend
195 for ( ; pA < pA_end ; pA++)
196 {
197 if (Ai [pA] >= aiend) break ;
198 }
199 #ifdef GB_DEBUG
200 // check the results with a binary search
201 bool found ;
202 int64_t p2 = Wp [k] ;
203 int64_t p2_end = Ap [k+1] - 1 ;
204 GB_SPLIT_BINARY_SEARCH (aiend, Ai, p2, p2_end, found) ;
205 ASSERT (pA == p2) ;
206 #endif
207 }
208 Cp [k-akstart] = (pA - Wp [k]) ; // # of entries in this vector
209 if (A_is_hyper)
210 {
211 Ch [k-akstart] = Ah [k] - avstart ;
212 }
213 // printf ("cknz is %ld\n", Cp [k-akstart]) ;
214 }
215
216 GB_cumsum (Cp, cnvec, &(C->nvec_nonempty), nth, Context) ;
217 int64_t cnz = Cp [cnvec] ;
218
219 // for (int64_t k = 0 ; k <= cnvec ; k++)
220 // {
221 // printf ("Cp [%ld] = %ld\n", k, Cp [k]) ;
222 // }
223
224 //------------------------------------------------------------------
225 // allocate C->i and C->x for this tile
226 //------------------------------------------------------------------
227
228 GB_OK (GB_bix_alloc (C, cnz, false, false, true, true, Context)) ;
229 int64_t *restrict Ci = C->i ;
230 // memset (Ci, 255, cnz * sizeof (int64_t)) ;
231
232 //------------------------------------------------------------------
233 // copy the tile from A into C
234 //------------------------------------------------------------------
235
236 int C_ntasks, C_nthreads ;
237 GB_SLICE_MATRIX (C, 8, chunk) ;
238 // printf ("C_ntasks %d C_nthreads %d\n", C_ntasks, C_nthreads) ;
239 // printf ("C->nzmax %ld C->nvec %ld\n", C->nzmax, C->nvec) ;
240
241 bool done = false ;
242
243 #ifndef GBCOMPACT
244 {
245 // no typecasting needed
246 switch (asize)
247 {
248 #define GB_COPY(pC,pA) Cx [pC] = Ax [pA]
249
250 case 1 : // uint8, int8, bool, or 1-byte user-defined
251 #define GB_CTYPE uint8_t
252 #include "GB_split_sparse_template.c"
253 break ;
254
255 case 2 : // uint16, int16, or 2-byte user-defined
256 #define GB_CTYPE uint16_t
257 #include "GB_split_sparse_template.c"
258 break ;
259
260 case 4 : // uint32, int32, float, or 4-byte user-defined
261 #define GB_CTYPE uint32_t
262 #include "GB_split_sparse_template.c"
263 break ;
264
265 case 8 : // uint64, int64, double, float complex,
266 // or 8-byte user defined
267 #define GB_CTYPE uint64_t
268 #include "GB_split_sparse_template.c"
269 break ;
270
271 case 16 : // double complex or 16-byte user-defined
272 #define GB_CTYPE uint64_t
273 #undef GB_COPY
274 #define GB_COPY(pC,pA) \
275 Cx [2*pC ] = Ax [2*pA ] ; \
276 Cx [2*pC+1] = Ax [2*pA+1] ;
277 #include "GB_split_sparse_template.c"
278 break ;
279
280 default:;
281 }
282 }
283 #endif
284
285 if (!done)
286 {
287 // user-defined types
288 #define GB_CTYPE GB_void
289 #undef GB_COPY
290 #define GB_COPY(pC,pA) \
291 memcpy (Cx + (pC)*asize, Ax + (pA)*asize, asize) ;
292 #include "GB_split_sparse_template.c"
293 }
294
295 //------------------------------------------------------------------
296 // free workspace
297 //------------------------------------------------------------------
298
299 GB_WERK_POP (C_ek_slicing, int64_t) ;
300
301 //------------------------------------------------------------------
302 // advance to the next tile
303 //------------------------------------------------------------------
304
305 if (inner < ninner - 1)
306 {
307 int64_t k ;
308 #pragma omp parallel for num_threads(nth) schedule(static)
309 for (k = akstart ; k < akend ; k++)
310 {
311 int64_t ck = k - akstart ;
312 int64_t cknz = Cp [ck+1] - Cp [ck] ;
313 Wp [k] += cknz ;
314 }
315 }
316
317 //------------------------------------------------------------------
318 // conform the tile and save it in the Tiles array
319 //------------------------------------------------------------------
320
321 C->magic = GB_MAGIC ;
322 ASSERT_MATRIX_OK (C, "C for GB_split", GB0) ;
323 GB_OK (GB_conform (C, Context)) ;
324 if (csc)
325 {
326 GB_TILE (Tiles, inner, outer) = C ;
327 }
328 else
329 {
330 GB_TILE (Tiles, outer, inner) = C ;
331 }
332 C = NULL ;
333 }
334 }
335
336 GB_FREE_WORK ;
337 return (GrB_SUCCESS) ;
338 }
339
340