1 //------------------------------------------------------------------------------
2 // GB_split_sparse: split a sparse/hypersparse matrix into tiles
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 #define GB_FREE_WORK                        \
11     GB_WERK_POP (C_ek_slicing, int64_t) ;   \
12     GB_FREE_WERK (&Wp, Wp_size) ;
13 
14 #define GB_FREE_ALL                         \
15     GB_FREE_WORK ;                          \
16     GB_Matrix_free (&C) ;
17 
18 #include "GB_split.h"
19 
GB_split_sparse(GrB_Matrix * Tiles,const GrB_Index m,const GrB_Index n,const int64_t * restrict Tile_rows,const int64_t * restrict Tile_cols,const GrB_Matrix A,GB_Context Context)20 GrB_Info GB_split_sparse            // split a sparse matrix
21 (
22     GrB_Matrix *Tiles,              // 2D row-major array of size m-by-n
23     const GrB_Index m,
24     const GrB_Index n,
25     const int64_t *restrict Tile_rows,  // size m+1
26     const int64_t *restrict Tile_cols,  // size n+1
27     const GrB_Matrix A,             // input matrix
28     GB_Context Context
29 )
30 {
31 
32     //--------------------------------------------------------------------------
33     // get inputs
34     //--------------------------------------------------------------------------
35 
36     GrB_Info info ;
37     int A_sparsity = GB_sparsity (A) ;
38     bool A_is_hyper = (A_sparsity == GxB_HYPERSPARSE) ;
39     ASSERT (A_is_hyper || A_sparsity == GxB_SPARSE) ;
40     GrB_Matrix C = NULL ;
41     GB_WERK_DECLARE (C_ek_slicing, int64_t) ;
42     ASSERT_MATRIX_OK (A, "A sparse for split", GB0) ;
43 
44     int sparsity_control = A->sparsity ;
45     float hyper_switch = A->hyper_switch ;
46     bool csc = A->is_csc ;
47     GrB_Type atype = A->type ;
48     int64_t avlen = A->vlen ;
49     int64_t avdim = A->vdim ;
50     size_t asize = atype->size ;
51 
52     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
53 
54     int64_t nouter = csc ? n : m ;
55     int64_t ninner = csc ? m : n ;
56 
57     const int64_t *Tile_vdim = csc ? Tile_cols : Tile_rows ;
58     const int64_t *Tile_vlen = csc ? Tile_rows : Tile_cols ;
59 
60     int64_t anvec = A->nvec ;
61     const int64_t *restrict Ap = A->p ;
62     const int64_t *restrict Ah = A->h ;
63     const int64_t *restrict Ai = A->i ;
64 
65     //--------------------------------------------------------------------------
66     // allocate workspace
67     //--------------------------------------------------------------------------
68 
69     size_t Wp_size = 0 ;
70     int64_t *restrict Wp = NULL ;
71     Wp = GB_MALLOC_WERK (anvec, int64_t, &Wp_size) ;
72     if (Wp == NULL)
73     {
74         // out of memory
75         GB_FREE_ALL ;
76         return (GrB_OUT_OF_MEMORY) ;
77     }
78     GB_memcpy (Wp, Ap, anvec * sizeof (int64_t), nthreads_max) ;
79 
80     //--------------------------------------------------------------------------
81     // split A into tiles
82     //--------------------------------------------------------------------------
83 
84     int64_t akend = 0 ;
85 
86     for (int64_t outer = 0 ; outer < nouter ; outer++)
87     {
88 
89         //----------------------------------------------------------------------
90         // find the starting and ending vector of these tiles
91         //----------------------------------------------------------------------
92 
93         // The tile appears in vectors avstart:avend-1 of A, and indices
94         // aistart:aiend-1.
95 
96         const int64_t avstart = Tile_vdim [outer] ;
97         const int64_t avend   = Tile_vdim [outer+1] ;
98         int64_t akstart = akend ;
99 
100         if (A_is_hyper)
101         {
102             // A is hypersparse: look for vector avend in the A->h hyper list.
103             // The vectors to handle for this outer loop are in
104             // Ah [akstart:akend-1].
105             akend = akstart ;
106             int64_t pright = anvec - 1 ;
107             bool found ;
108             GB_SPLIT_BINARY_SEARCH (avend, Ah, akend, pright, found) ;
109 //          printf ("anvec %ld akstart: %ld akend: %ld avend %ld\n",
110 //              anvec, akstart, akend, avend) ;
111             ASSERT (GB_IMPLIES (akstart <= akend-1, Ah [akend-1] < avend)) ;
112 //          for (int64_t k = akstart ; k < akend ; k++)
113 //          {
114 //              printf ("   Ah [%ld] = %ld\n", k, Ah [k]) ;
115 //          }
116         }
117         else
118         {
119             // A is sparse; the vectors to handle are akstart:akend-1
120             akend = avend ;
121         }
122 
123         // # of vectors in all tiles in this outer loop
124         int64_t cnvec = akend - akstart ;
125         int nth = GB_nthreads (cnvec, chunk, nthreads_max) ;
126 // printf ("akend %ld\n", akend) ;
127 
128         //----------------------------------------------------------------------
129         // create all tiles for vectors akstart:akend-1 in A
130         //----------------------------------------------------------------------
131 
132         for (int64_t inner = 0 ; inner < ninner ; inner++)
133         {
134 // printf ("\ninner: %ld, cnvec %ld akstart %ld akend %ld\n",
135     // inner, cnvec, akstart, akend) ;
136 
137             //------------------------------------------------------------------
138             // allocate C, C->p, and C->h for this tile
139             //------------------------------------------------------------------
140 
141             const int64_t aistart = Tile_vlen [inner] ;
142             const int64_t aiend   = Tile_vlen [inner+1] ;
143             const int64_t cvdim = avend - avstart ;
144             const int64_t cvlen = aiend - aistart ;
145 
146             C = NULL ;
147             GB_OK (GB_new (&C, false,      // new header
148                 atype, cvlen, cvdim, GB_Ap_malloc, csc, A_sparsity,
149                 hyper_switch, cnvec, Context)) ;
150             C->sparsity = sparsity_control ;
151             C->hyper_switch = hyper_switch ;
152             C->nvec = cnvec ;
153             int64_t *restrict Cp = C->p ;
154             int64_t *restrict Ch = C->h ;
155 
156             //------------------------------------------------------------------
157             // determine the boundaries of this tile
158             //------------------------------------------------------------------
159 
160             int64_t k ;
161             #pragma omp parallel for num_threads(nth) schedule(static)
162             for (k = akstart ; k < akend ; k++)
163             {
164                 // printf ("look in kth vector of A: k = %ld\n", k) ;
165                 int64_t pA = Wp [k] ;
166                 int64_t pA_end = Ap [k+1] ;
167                 int64_t aknz = pA_end - pA ;
168                 // printf ("Wp [%ld] = %ld\n", k, Wp [k]) ;
169                 // printf ("Ap [%ld+1] = %ld\n", k, Ap [k+1]) ;
170                 // printf ("aknz %ld\n", aknz) ;
171                 if (aknz == 0 || Ai [pA] >= aiend)
172                 {
173                     // this vector of C is empty
174                     // printf ("empty\n") ;
175                 }
176                 else if (aknz > 256)
177                 {
178                     // use binary search to find aiend
179                     bool found ;
180                     int64_t pright = pA_end - 1 ;
181                     GB_SPLIT_BINARY_SEARCH (aiend, Ai, pA, pright, found) ;
182                     #ifdef GB_DEBUG
183                     // check the results with a linear search
184                     int64_t p2 = Wp [k] ;
185                     for ( ; p2 < Ap [k+1] ; p2++)
186                     {
187                         if (Ai [p2] >= aiend) break ;
188                     }
189                     ASSERT (pA == p2) ;
190                     #endif
191                 }
192                 else
193                 {
194                     // use a linear-time search to find aiend
195                     for ( ; pA < pA_end ; pA++)
196                     {
197                         if (Ai [pA] >= aiend) break ;
198                     }
199                     #ifdef GB_DEBUG
200                     // check the results with a binary search
201                     bool found ;
202                     int64_t p2 = Wp [k] ;
203                     int64_t p2_end = Ap [k+1] - 1 ;
204                     GB_SPLIT_BINARY_SEARCH (aiend, Ai, p2, p2_end, found) ;
205                     ASSERT (pA == p2) ;
206                     #endif
207                 }
208                 Cp [k-akstart] = (pA - Wp [k]) ; // # of entries in this vector
209                 if (A_is_hyper)
210                 {
211                     Ch [k-akstart] = Ah [k] - avstart ;
212                 }
213                 // printf ("cknz is %ld\n", Cp [k-akstart]) ;
214             }
215 
216             GB_cumsum (Cp, cnvec, &(C->nvec_nonempty), nth, Context) ;
217             int64_t cnz = Cp [cnvec] ;
218 
219             // for (int64_t k = 0 ; k <= cnvec ; k++)
220             // {
221                 // printf ("Cp [%ld] = %ld\n", k, Cp [k]) ;
222             // }
223 
224             //------------------------------------------------------------------
225             // allocate C->i and C->x for this tile
226             //------------------------------------------------------------------
227 
228             GB_OK (GB_bix_alloc (C, cnz, false, false, true, true, Context)) ;
229             int64_t *restrict Ci = C->i ;
230             // memset (Ci, 255, cnz * sizeof (int64_t)) ;
231 
232             //------------------------------------------------------------------
233             // copy the tile from A into C
234             //------------------------------------------------------------------
235 
236             int C_ntasks, C_nthreads ;
237             GB_SLICE_MATRIX (C, 8, chunk) ;
238 // printf ("C_ntasks %d C_nthreads %d\n", C_ntasks, C_nthreads) ;
239 // printf ("C->nzmax %ld C->nvec %ld\n", C->nzmax, C->nvec) ;
240 
241             bool done = false ;
242 
243             #ifndef GBCOMPACT
244             {
245                 // no typecasting needed
246                 switch (asize)
247                 {
248                     #define GB_COPY(pC,pA) Cx [pC] = Ax [pA]
249 
250                     case 1 : // uint8, int8, bool, or 1-byte user-defined
251                         #define GB_CTYPE uint8_t
252                         #include "GB_split_sparse_template.c"
253                         break ;
254 
255                     case 2 : // uint16, int16, or 2-byte user-defined
256                         #define GB_CTYPE uint16_t
257                         #include "GB_split_sparse_template.c"
258                         break ;
259 
260                     case 4 : // uint32, int32, float, or 4-byte user-defined
261                         #define GB_CTYPE uint32_t
262                         #include "GB_split_sparse_template.c"
263                         break ;
264 
265                     case 8 : // uint64, int64, double, float complex,
266                              // or 8-byte user defined
267                         #define GB_CTYPE uint64_t
268                         #include "GB_split_sparse_template.c"
269                         break ;
270 
271                     case 16 : // double complex or 16-byte user-defined
272                         #define GB_CTYPE uint64_t
273                         #undef  GB_COPY
274                         #define GB_COPY(pC,pA)                      \
275                             Cx [2*pC  ] = Ax [2*pA  ] ;             \
276                             Cx [2*pC+1] = Ax [2*pA+1] ;
277                         #include "GB_split_sparse_template.c"
278                         break ;
279 
280                     default:;
281                 }
282             }
283             #endif
284 
285             if (!done)
286             {
287                 // user-defined types
288                 #define GB_CTYPE GB_void
289                 #undef  GB_COPY
290                 #define GB_COPY(pC,pA)  \
291                     memcpy (Cx + (pC)*asize, Ax + (pA)*asize, asize) ;
292                 #include "GB_split_sparse_template.c"
293             }
294 
295             //------------------------------------------------------------------
296             // free workspace
297             //------------------------------------------------------------------
298 
299             GB_WERK_POP (C_ek_slicing, int64_t) ;
300 
301             //------------------------------------------------------------------
302             // advance to the next tile
303             //------------------------------------------------------------------
304 
305             if (inner < ninner - 1)
306             {
307                 int64_t k ;
308                 #pragma omp parallel for num_threads(nth) schedule(static)
309                 for (k = akstart ; k < akend ; k++)
310                 {
311                     int64_t ck = k - akstart ;
312                     int64_t cknz = Cp [ck+1] - Cp [ck] ;
313                     Wp [k] += cknz ;
314                 }
315             }
316 
317             //------------------------------------------------------------------
318             // conform the tile and save it in the Tiles array
319             //------------------------------------------------------------------
320 
321             C->magic = GB_MAGIC ;
322             ASSERT_MATRIX_OK (C, "C for GB_split", GB0) ;
323             GB_OK (GB_conform (C, Context)) ;
324             if (csc)
325             {
326                 GB_TILE (Tiles, inner, outer) = C ;
327             }
328             else
329             {
330                 GB_TILE (Tiles, outer, inner) = C ;
331             }
332             C = NULL ;
333         }
334     }
335 
336     GB_FREE_WORK ;
337     return (GrB_SUCCESS) ;
338 }
339 
340