1 //------------------------------------------------------------------------------
2 // GB_concat_sparse: concatenate an array of matrices into a sparse matrix
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 #define GB_FREE_WORK                            \
11     if (S != NULL)                              \
12     {                                           \
13         for (int64_t k = 0 ; k < m * n ; k++)   \
14         {                                       \
15             GB_Matrix_free (&(S [k])) ;         \
16         }                                       \
17     }                                           \
18     GB_FREE_WERK (&S, S_size) ;                 \
19     GB_FREE_WERK (&Work, Work_size) ;           \
20     GB_WERK_POP (A_ek_slicing, int64_t) ;
21 
22 #define GB_FREE_ALL         \
23     GB_FREE_WORK ;          \
24     GB_phbix_free (C) ;
25 
26 #include "GB_concat.h"
27 
GB_concat_sparse(GrB_Matrix C,const int64_t cnz,const GrB_Matrix * Tiles,const GrB_Index m,const GrB_Index n,const int64_t * restrict Tile_rows,const int64_t * restrict Tile_cols,GB_Context Context)28 GrB_Info GB_concat_sparse           // concatenate into a sparse matrix
29 (
30     GrB_Matrix C,                   // input/output matrix for results
31     const int64_t cnz,              // # of entries in C
32     const GrB_Matrix *Tiles,        // 2D row-major array of size m-by-n,
33     const GrB_Index m,
34     const GrB_Index n,
35     const int64_t *restrict Tile_rows,  // size m+1
36     const int64_t *restrict Tile_cols,  // size n+1
37     GB_Context Context
38 )
39 {
40 
41     //--------------------------------------------------------------------------
42     // allocate C as a sparse matrix
43     //--------------------------------------------------------------------------
44 
45     GrB_Info info ;
46     GrB_Matrix A = NULL ;
47     GB_WERK_DECLARE (A_ek_slicing, int64_t) ;
48     int64_t *Work = NULL ;
49     size_t Work_size = 0 ;
50     GrB_Matrix *S = NULL ;
51     size_t S_size = 0 ;
52 
53     GrB_Type ctype = C->type ;
54     int64_t cvlen = C->vlen ;
55     int64_t cvdim = C->vdim ;
56     bool csc = C->is_csc ;
57     size_t csize = ctype->size ;
58     GB_Type_code ccode = ctype->code ;
59 
60     float hyper_switch = C->hyper_switch ;
61     float bitmap_switch = C->bitmap_switch ;
62     int sparsity_control = C->sparsity ;
63     bool static_header = C->static_header ;
64     GB_phbix_free (C) ;
65     GB_OK (GB_new_bix (&C, static_header,   // prior static or dynamic header
66         ctype, cvlen, cvdim, GB_Ap_malloc, csc, GxB_SPARSE, false,
67         hyper_switch, cvdim, cnz, true, Context)) ;
68     C->bitmap_switch = bitmap_switch ;
69     C->sparsity = sparsity_control ;
70     int64_t *restrict Cp = C->p ;
71     int64_t *restrict Ci = C->i ;
72 
73     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
74 
75     //--------------------------------------------------------------------------
76     // allocate workspace
77     //--------------------------------------------------------------------------
78 
79     int64_t nouter = csc ? n : m ;
80     int64_t ninner = csc ? m : n ;
81     Work = GB_CALLOC_WERK (ninner * cvdim, int64_t, &Work_size) ;
82     S = GB_CALLOC_WERK (m * n, GrB_Matrix, &S_size) ;
83     if (S == NULL || Work == NULL)
84     {
85         // out of memory
86         GB_FREE_ALL ;
87         return (GrB_OUT_OF_MEMORY) ;
88     }
89 
90     //--------------------------------------------------------------------------
91     // count entries in each vector of each tile
92     //--------------------------------------------------------------------------
93 
94     for (int64_t outer = 0 ; outer < nouter ; outer++)
95     {
96         for (int64_t inner = 0 ; inner < ninner ; inner++)
97         {
98 
99             //------------------------------------------------------------------
100             // get the tile A; transpose and typecast, if needed
101             //------------------------------------------------------------------
102 
103             A = csc ? GB_TILE (Tiles, inner, outer)
104                     : GB_TILE (Tiles, outer, inner) ;
105             GrB_Matrix T = NULL ;
106             if (csc != A->is_csc)
107             {
108                 // T = (ctype) A', not in-place, using a dynamic header
109                 GB_OK (GB_transpose (&T, ctype, csc, A,
110                     NULL, NULL, NULL, false, Context)) ;
111                 // save T in array S
112                 if (csc)
113                 {
114                     GB_TILE (S, inner, outer) = T ;
115                 }
116                 else
117                 {
118                     GB_TILE (S, outer, inner) = T ;
119                 }
120                 A = T ;
121                 GB_MATRIX_WAIT (A) ;
122             }
123             ASSERT (C->is_csc == A->is_csc) ;
124             ASSERT (!GB_ANY_PENDING_WORK (A)) ;
125 
126             //------------------------------------------------------------------
127             // ensure the tile is not bitmap
128             //------------------------------------------------------------------
129 
130             if (GB_IS_BITMAP (A))
131             {
132                 if (T == NULL)
133                 {
134                     // copy A into T
135                     GB_OK (GB_dup2 (&T, A, true, NULL, Context)) ;
136                     // save T in array S
137                     if (csc)
138                     {
139                         GB_TILE (S, inner, outer) = T ;
140                     }
141                     else
142                     {
143                         GB_TILE (S, outer, inner) = T ;
144                     }
145                 }
146                 // convert T from bitmap to sparse
147                 GB_OK (GB_convert_bitmap_to_sparse (T, Context)) ;
148                 A = T ;
149             }
150 
151             ASSERT (!GB_IS_BITMAP (A)) ;
152 
153             //------------------------------------------------------------------
154             // log the # of entries in each vector of the tile A
155             //------------------------------------------------------------------
156 
157             const int64_t anvec = A->nvec ;
158             const int64_t avlen = A->vlen ;
159             int64_t cvstart = csc ?  Tile_cols [outer] : Tile_rows [outer] ;
160             int64_t *restrict W = Work + inner * cvdim + cvstart ;
161             int nth = GB_nthreads (anvec, chunk, nthreads_max) ;
162             if (GB_IS_FULL (A))
163             {
164                 // A is full
165                 int64_t j ;
166                 #pragma omp parallel for num_threads(nth) schedule(static)
167                 for (j = 0 ; j < anvec ; j++)
168                 {
169                     // W [j] = # of entries in A(:,j), which is just avlen
170                     W [j] = avlen ;
171                 }
172             }
173             else
174             {
175                 // A is sparse or hyper
176                 int64_t k ;
177                 int64_t *restrict Ah = A->h ;
178                 int64_t *restrict Ap = A->p ;
179                 #pragma omp parallel for num_threads(nth) schedule(static)
180                 for (k = 0 ; k < anvec ; k++)
181                 {
182                     // W [j] = # of entries in A(:,j), the kth column of A
183                     int64_t j = GBH (Ah, k) ;
184                     W [j] = Ap [k+1] - Ap [k] ;
185                 }
186             }
187         }
188     }
189 
190     //--------------------------------------------------------------------------
191     // cumulative sum of entries in each tile
192     //--------------------------------------------------------------------------
193 
194     int nth = GB_nthreads (ninner*cvdim, chunk, nthreads_max) ;
195     int64_t k ;
196     #pragma omp parallel for num_threads(nth) schedule(static)
197     for (k = 0 ; k < cvdim ; k++)
198     {
199         int64_t s = 0 ;
200         for (int64_t inner = 0 ; inner < ninner ; inner++)
201         {
202             int64_t p = inner * cvdim + k ;
203             int64_t c = Work [p] ;
204             Work [p] = s ;
205             s += c ;
206         }
207         // total number of entries in C(:,k)
208         Cp [k] = s ;
209     }
210 
211     GB_cumsum (Cp, cvdim, &(C->nvec_nonempty), nthreads_max, Context) ;
212 
213     #pragma omp parallel for num_threads(nth) schedule(static)
214     for (k = 0 ; k < cvdim ; k++)
215     {
216         int64_t pC = Cp [k] ;
217         for (int64_t inner = 0 ; inner < ninner ; inner++)
218         {
219             int64_t p = inner * cvdim + k ;
220             Work [p] += pC ;
221         }
222     }
223 
224     //--------------------------------------------------------------------------
225     // concatenate all matrices into C
226     //--------------------------------------------------------------------------
227 
228     for (int64_t outer = 0 ; outer < nouter ; outer++)
229     {
230         for (int64_t inner = 0 ; inner < ninner ; inner++)
231         {
232 
233             //------------------------------------------------------------------
234             // get the tile A, either the temporary matrix T or the original A
235             //------------------------------------------------------------------
236 
237             A = csc ? GB_TILE (S, inner, outer)
238                     : GB_TILE (S, outer, inner) ;
239             if (A == NULL)
240             {
241                 A = csc ? GB_TILE (Tiles, inner, outer)
242                         : GB_TILE (Tiles, outer, inner) ;
243             }
244 
245             ASSERT (!GB_IS_BITMAP (A)) ;
246             ASSERT (C->is_csc == A->is_csc) ;
247             ASSERT (!GB_ANY_PENDING_WORK (A)) ;
248             GB_Type_code acode = A->type->code ;
249 
250             //------------------------------------------------------------------
251             // determine where to place the tile in C
252             //------------------------------------------------------------------
253 
254             // The tile A appears in vectors cvstart:cvend-1 of C, and indices
255             // cistart:ciend-1.
256 
257             int64_t cvstart, cvend, cistart, ciend ;
258             if (csc)
259             {
260                 // C and A are held by column
261                 // Tiles is row-major and accessed in column order
262                 cvstart = Tile_cols [outer] ;
263                 cvend   = Tile_cols [outer+1] ;
264                 cistart = Tile_rows [inner] ;
265                 ciend   = Tile_rows [inner+1] ;
266             }
267             else
268             {
269                 // C and A are held by row
270                 // Tiles is row-major and accessed in row order
271                 cvstart = Tile_rows [outer] ;
272                 cvend   = Tile_rows [outer+1] ;
273                 cistart = Tile_cols [inner] ;
274                 ciend   = Tile_cols [inner+1] ;
275             }
276 
277             // get the workspace pointer array W for this tile
278             int64_t *restrict W = Work + inner * cvdim + cvstart ;
279 
280             //------------------------------------------------------------------
281             // slice the tile
282             //------------------------------------------------------------------
283 
284             int64_t avdim = cvend - cvstart ;
285             int64_t avlen = ciend - cistart ;
286             ASSERT (avdim == A->vdim) ;
287             ASSERT (avlen == A->vlen) ;
288             int A_nthreads, A_ntasks ;
289             const int64_t *restrict Ap = A->p ;
290             const int64_t *restrict Ah = A->h ;
291             const int64_t *restrict Ai = A->i ;
292             GB_SLICE_MATRIX (A, 1, chunk) ;
293 
294             //------------------------------------------------------------------
295             // copy the tile A into C
296             //------------------------------------------------------------------
297 
298             bool done = false ;
299 
300             #ifndef GBCOMPACT
301                 if (ccode == acode)
302                 {
303                     // no typecasting needed
304                     switch (csize)
305                     {
306                         #define GB_COPY(pC,pA) Cx [pC] = Ax [pA]
307 
308                         case 1 : // uint8, int8, bool, or 1-byte user-defined
309                             #define GB_CTYPE uint8_t
310                             #include "GB_concat_sparse_template.c"
311                             break ;
312 
313                         case 2 : // uint16, int16, or 2-byte user-defined
314                             #define GB_CTYPE uint16_t
315                             #include "GB_concat_sparse_template.c"
316                             break ;
317 
318                         case 4 : // uint32, int32, float, or 4-byte user-defined
319                             #define GB_CTYPE uint32_t
320                             #include "GB_concat_sparse_template.c"
321                             break ;
322 
323                         case 8 : // uint64, int64, double, float complex,
324                                  // or 8-byte user defined
325                             #define GB_CTYPE uint64_t
326                             #include "GB_concat_sparse_template.c"
327                             break ;
328 
329                         case 16 : // double complex or 16-byte user-defined
330                             #define GB_CTYPE uint64_t
331                             #undef  GB_COPY
332                             #define GB_COPY(pC,pA)                      \
333                                 Cx [2*pC  ] = Ax [2*pA  ] ;             \
334                                 Cx [2*pC+1] = Ax [2*pA+1] ;
335                             #include "GB_concat_sparse_template.c"
336                             break ;
337 
338                         default:;
339                     }
340                 }
341             #endif
342 
343             if (!done)
344             {
345                 // with typecasting or user-defined types
346                 GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;
347                 size_t asize = A->type->size ;
348                 #define GB_CTYPE GB_void
349                 #undef  GB_COPY
350                 #define GB_COPY(pC,pA)  \
351                     cast_A_to_C (Cx + (pC)*csize, Ax + (pA)*asize, asize) ;
352                 #include "GB_concat_sparse_template.c"
353             }
354 
355             GB_WERK_POP (A_ek_slicing, int64_t) ;
356         }
357     }
358 
359     //--------------------------------------------------------------------------
360     // free workspace and return result
361     //--------------------------------------------------------------------------
362 
363     GB_FREE_WORK ;
364     C->magic = GB_MAGIC ;
365     return (GrB_SUCCESS) ;
366 }
367 
368