1 //------------------------------------------------------------------------------
2 // GB_concat_sparse: concatenate an array of matrices into a sparse matrix
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 #define GB_FREE_WORK \
11 if (S != NULL) \
12 { \
13 for (int64_t k = 0 ; k < m * n ; k++) \
14 { \
15 GB_Matrix_free (&(S [k])) ; \
16 } \
17 } \
18 GB_FREE_WERK (&S, S_size) ; \
19 GB_FREE_WERK (&Work, Work_size) ; \
20 GB_WERK_POP (A_ek_slicing, int64_t) ;
21
22 #define GB_FREE_ALL \
23 GB_FREE_WORK ; \
24 GB_phbix_free (C) ;
25
26 #include "GB_concat.h"
27
GB_concat_sparse(GrB_Matrix C,const int64_t cnz,const GrB_Matrix * Tiles,const GrB_Index m,const GrB_Index n,const int64_t * restrict Tile_rows,const int64_t * restrict Tile_cols,GB_Context Context)28 GrB_Info GB_concat_sparse // concatenate into a sparse matrix
29 (
30 GrB_Matrix C, // input/output matrix for results
31 const int64_t cnz, // # of entries in C
32 const GrB_Matrix *Tiles, // 2D row-major array of size m-by-n,
33 const GrB_Index m,
34 const GrB_Index n,
35 const int64_t *restrict Tile_rows, // size m+1
36 const int64_t *restrict Tile_cols, // size n+1
37 GB_Context Context
38 )
39 {
40
41 //--------------------------------------------------------------------------
42 // allocate C as a sparse matrix
43 //--------------------------------------------------------------------------
44
45 GrB_Info info ;
46 GrB_Matrix A = NULL ;
47 GB_WERK_DECLARE (A_ek_slicing, int64_t) ;
48 int64_t *Work = NULL ;
49 size_t Work_size = 0 ;
50 GrB_Matrix *S = NULL ;
51 size_t S_size = 0 ;
52
53 GrB_Type ctype = C->type ;
54 int64_t cvlen = C->vlen ;
55 int64_t cvdim = C->vdim ;
56 bool csc = C->is_csc ;
57 size_t csize = ctype->size ;
58 GB_Type_code ccode = ctype->code ;
59
60 float hyper_switch = C->hyper_switch ;
61 float bitmap_switch = C->bitmap_switch ;
62 int sparsity_control = C->sparsity ;
63 bool static_header = C->static_header ;
64 GB_phbix_free (C) ;
65 GB_OK (GB_new_bix (&C, static_header, // prior static or dynamic header
66 ctype, cvlen, cvdim, GB_Ap_malloc, csc, GxB_SPARSE, false,
67 hyper_switch, cvdim, cnz, true, Context)) ;
68 C->bitmap_switch = bitmap_switch ;
69 C->sparsity = sparsity_control ;
70 int64_t *restrict Cp = C->p ;
71 int64_t *restrict Ci = C->i ;
72
73 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
74
75 //--------------------------------------------------------------------------
76 // allocate workspace
77 //--------------------------------------------------------------------------
78
79 int64_t nouter = csc ? n : m ;
80 int64_t ninner = csc ? m : n ;
81 Work = GB_CALLOC_WERK (ninner * cvdim, int64_t, &Work_size) ;
82 S = GB_CALLOC_WERK (m * n, GrB_Matrix, &S_size) ;
83 if (S == NULL || Work == NULL)
84 {
85 // out of memory
86 GB_FREE_ALL ;
87 return (GrB_OUT_OF_MEMORY) ;
88 }
89
90 //--------------------------------------------------------------------------
91 // count entries in each vector of each tile
92 //--------------------------------------------------------------------------
93
94 for (int64_t outer = 0 ; outer < nouter ; outer++)
95 {
96 for (int64_t inner = 0 ; inner < ninner ; inner++)
97 {
98
99 //------------------------------------------------------------------
100 // get the tile A; transpose and typecast, if needed
101 //------------------------------------------------------------------
102
103 A = csc ? GB_TILE (Tiles, inner, outer)
104 : GB_TILE (Tiles, outer, inner) ;
105 GrB_Matrix T = NULL ;
106 if (csc != A->is_csc)
107 {
108 // T = (ctype) A', not in-place, using a dynamic header
109 GB_OK (GB_transpose (&T, ctype, csc, A,
110 NULL, NULL, NULL, false, Context)) ;
111 // save T in array S
112 if (csc)
113 {
114 GB_TILE (S, inner, outer) = T ;
115 }
116 else
117 {
118 GB_TILE (S, outer, inner) = T ;
119 }
120 A = T ;
121 GB_MATRIX_WAIT (A) ;
122 }
123 ASSERT (C->is_csc == A->is_csc) ;
124 ASSERT (!GB_ANY_PENDING_WORK (A)) ;
125
126 //------------------------------------------------------------------
127 // ensure the tile is not bitmap
128 //------------------------------------------------------------------
129
130 if (GB_IS_BITMAP (A))
131 {
132 if (T == NULL)
133 {
134 // copy A into T
135 GB_OK (GB_dup2 (&T, A, true, NULL, Context)) ;
136 // save T in array S
137 if (csc)
138 {
139 GB_TILE (S, inner, outer) = T ;
140 }
141 else
142 {
143 GB_TILE (S, outer, inner) = T ;
144 }
145 }
146 // convert T from bitmap to sparse
147 GB_OK (GB_convert_bitmap_to_sparse (T, Context)) ;
148 A = T ;
149 }
150
151 ASSERT (!GB_IS_BITMAP (A)) ;
152
153 //------------------------------------------------------------------
154 // log the # of entries in each vector of the tile A
155 //------------------------------------------------------------------
156
157 const int64_t anvec = A->nvec ;
158 const int64_t avlen = A->vlen ;
159 int64_t cvstart = csc ? Tile_cols [outer] : Tile_rows [outer] ;
160 int64_t *restrict W = Work + inner * cvdim + cvstart ;
161 int nth = GB_nthreads (anvec, chunk, nthreads_max) ;
162 if (GB_IS_FULL (A))
163 {
164 // A is full
165 int64_t j ;
166 #pragma omp parallel for num_threads(nth) schedule(static)
167 for (j = 0 ; j < anvec ; j++)
168 {
169 // W [j] = # of entries in A(:,j), which is just avlen
170 W [j] = avlen ;
171 }
172 }
173 else
174 {
175 // A is sparse or hyper
176 int64_t k ;
177 int64_t *restrict Ah = A->h ;
178 int64_t *restrict Ap = A->p ;
179 #pragma omp parallel for num_threads(nth) schedule(static)
180 for (k = 0 ; k < anvec ; k++)
181 {
182 // W [j] = # of entries in A(:,j), the kth column of A
183 int64_t j = GBH (Ah, k) ;
184 W [j] = Ap [k+1] - Ap [k] ;
185 }
186 }
187 }
188 }
189
190 //--------------------------------------------------------------------------
191 // cumulative sum of entries in each tile
192 //--------------------------------------------------------------------------
193
194 int nth = GB_nthreads (ninner*cvdim, chunk, nthreads_max) ;
195 int64_t k ;
196 #pragma omp parallel for num_threads(nth) schedule(static)
197 for (k = 0 ; k < cvdim ; k++)
198 {
199 int64_t s = 0 ;
200 for (int64_t inner = 0 ; inner < ninner ; inner++)
201 {
202 int64_t p = inner * cvdim + k ;
203 int64_t c = Work [p] ;
204 Work [p] = s ;
205 s += c ;
206 }
207 // total number of entries in C(:,k)
208 Cp [k] = s ;
209 }
210
211 GB_cumsum (Cp, cvdim, &(C->nvec_nonempty), nthreads_max, Context) ;
212
213 #pragma omp parallel for num_threads(nth) schedule(static)
214 for (k = 0 ; k < cvdim ; k++)
215 {
216 int64_t pC = Cp [k] ;
217 for (int64_t inner = 0 ; inner < ninner ; inner++)
218 {
219 int64_t p = inner * cvdim + k ;
220 Work [p] += pC ;
221 }
222 }
223
224 //--------------------------------------------------------------------------
225 // concatenate all matrices into C
226 //--------------------------------------------------------------------------
227
228 for (int64_t outer = 0 ; outer < nouter ; outer++)
229 {
230 for (int64_t inner = 0 ; inner < ninner ; inner++)
231 {
232
233 //------------------------------------------------------------------
234 // get the tile A, either the temporary matrix T or the original A
235 //------------------------------------------------------------------
236
237 A = csc ? GB_TILE (S, inner, outer)
238 : GB_TILE (S, outer, inner) ;
239 if (A == NULL)
240 {
241 A = csc ? GB_TILE (Tiles, inner, outer)
242 : GB_TILE (Tiles, outer, inner) ;
243 }
244
245 ASSERT (!GB_IS_BITMAP (A)) ;
246 ASSERT (C->is_csc == A->is_csc) ;
247 ASSERT (!GB_ANY_PENDING_WORK (A)) ;
248 GB_Type_code acode = A->type->code ;
249
250 //------------------------------------------------------------------
251 // determine where to place the tile in C
252 //------------------------------------------------------------------
253
254 // The tile A appears in vectors cvstart:cvend-1 of C, and indices
255 // cistart:ciend-1.
256
257 int64_t cvstart, cvend, cistart, ciend ;
258 if (csc)
259 {
260 // C and A are held by column
261 // Tiles is row-major and accessed in column order
262 cvstart = Tile_cols [outer] ;
263 cvend = Tile_cols [outer+1] ;
264 cistart = Tile_rows [inner] ;
265 ciend = Tile_rows [inner+1] ;
266 }
267 else
268 {
269 // C and A are held by row
270 // Tiles is row-major and accessed in row order
271 cvstart = Tile_rows [outer] ;
272 cvend = Tile_rows [outer+1] ;
273 cistart = Tile_cols [inner] ;
274 ciend = Tile_cols [inner+1] ;
275 }
276
277 // get the workspace pointer array W for this tile
278 int64_t *restrict W = Work + inner * cvdim + cvstart ;
279
280 //------------------------------------------------------------------
281 // slice the tile
282 //------------------------------------------------------------------
283
284 int64_t avdim = cvend - cvstart ;
285 int64_t avlen = ciend - cistart ;
286 ASSERT (avdim == A->vdim) ;
287 ASSERT (avlen == A->vlen) ;
288 int A_nthreads, A_ntasks ;
289 const int64_t *restrict Ap = A->p ;
290 const int64_t *restrict Ah = A->h ;
291 const int64_t *restrict Ai = A->i ;
292 GB_SLICE_MATRIX (A, 1, chunk) ;
293
294 //------------------------------------------------------------------
295 // copy the tile A into C
296 //------------------------------------------------------------------
297
298 bool done = false ;
299
300 #ifndef GBCOMPACT
301 if (ccode == acode)
302 {
303 // no typecasting needed
304 switch (csize)
305 {
306 #define GB_COPY(pC,pA) Cx [pC] = Ax [pA]
307
308 case 1 : // uint8, int8, bool, or 1-byte user-defined
309 #define GB_CTYPE uint8_t
310 #include "GB_concat_sparse_template.c"
311 break ;
312
313 case 2 : // uint16, int16, or 2-byte user-defined
314 #define GB_CTYPE uint16_t
315 #include "GB_concat_sparse_template.c"
316 break ;
317
318 case 4 : // uint32, int32, float, or 4-byte user-defined
319 #define GB_CTYPE uint32_t
320 #include "GB_concat_sparse_template.c"
321 break ;
322
323 case 8 : // uint64, int64, double, float complex,
324 // or 8-byte user defined
325 #define GB_CTYPE uint64_t
326 #include "GB_concat_sparse_template.c"
327 break ;
328
329 case 16 : // double complex or 16-byte user-defined
330 #define GB_CTYPE uint64_t
331 #undef GB_COPY
332 #define GB_COPY(pC,pA) \
333 Cx [2*pC ] = Ax [2*pA ] ; \
334 Cx [2*pC+1] = Ax [2*pA+1] ;
335 #include "GB_concat_sparse_template.c"
336 break ;
337
338 default:;
339 }
340 }
341 #endif
342
343 if (!done)
344 {
345 // with typecasting or user-defined types
346 GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;
347 size_t asize = A->type->size ;
348 #define GB_CTYPE GB_void
349 #undef GB_COPY
350 #define GB_COPY(pC,pA) \
351 cast_A_to_C (Cx + (pC)*csize, Ax + (pA)*asize, asize) ;
352 #include "GB_concat_sparse_template.c"
353 }
354
355 GB_WERK_POP (A_ek_slicing, int64_t) ;
356 }
357 }
358
359 //--------------------------------------------------------------------------
360 // free workspace and return result
361 //--------------------------------------------------------------------------
362
363 GB_FREE_WORK ;
364 C->magic = GB_MAGIC ;
365 return (GrB_SUCCESS) ;
366 }
367
368