1 //------------------------------------------------------------------------------
2 // GB_convert_bitmap_worker: construct triplets or CSC/CSR from bitmap
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // TODO allow this function to do typecasting.  Create 169 different versions
11 // for all 13x13 versions.  Use this as part of Method 24, C=A assignment.
12 // Can also use typecasting for GB_Matrix_diag.
13 
14 #include "GB.h"
15 #include "GB_partition.h"
16 
GB_convert_bitmap_worker(int64_t * restrict Ap,int64_t * restrict Ai,int64_t * restrict Aj,GB_void * restrict Ax_new,int64_t * anvec_nonempty,const GrB_Matrix A,GB_Context Context)17 GrB_Info GB_convert_bitmap_worker   // extract CSC/CSR or triplets from bitmap
18 (
19     // outputs:
20     int64_t *restrict Ap,           // vector pointers for CSC/CSR form
21     int64_t *restrict Ai,           // indices for CSC/CSR or triplet form
22     int64_t *restrict Aj,           // vector indices for triplet form
23     GB_void *restrict Ax_new,       // values for CSC/CSR or triplet form
24     int64_t *anvec_nonempty,        // # of non-empty vectors
25     // inputs: not modified
26     const GrB_Matrix A,             // matrix to extract; not modified
27     GB_Context Context
28 )
29 {
30 
31     //--------------------------------------------------------------------------
32     // check inputs
33     //--------------------------------------------------------------------------
34 
35     ASSERT (GB_IS_BITMAP (A)) ;
36     ASSERT (Ap != NULL) ;           // must be provided on input, size avdim+1
37 
38     int64_t *restrict W = NULL ; size_t W_size = 0 ;
39     const int64_t avdim = A->vdim ;
40     const int64_t avlen = A->vlen ;
41     const size_t asize = A->type->size ;
42 
43     //--------------------------------------------------------------------------
44     // count the entries in each vector
45     //--------------------------------------------------------------------------
46 
47     const int8_t *restrict Ab = A->b ;
48 
49     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
50     int nthreads = GB_nthreads (avlen*avdim, chunk, nthreads_max) ;
51     bool by_vector = (nthreads <= avdim) ;
52 
53     if (by_vector)
54     {
55 
56         //----------------------------------------------------------------------
57         // compute all vectors in parallel (no workspace)
58         //----------------------------------------------------------------------
59 
60         int64_t j ;
61         #pragma omp parallel for num_threads(nthreads) schedule(static)
62         for (j = 0 ; j < avdim ; j++)
63         {
64             // ajnz = nnz (A (:,j))
65             int64_t ajnz = 0 ;
66             int64_t pA_start = j * avlen ;
67             for (int64_t i = 0 ; i < avlen ; i++)
68             {
69                 // see if A(i,j) is present in the bitmap
70                 int64_t p = i + pA_start ;
71                 ajnz += Ab [p] ;
72                 ASSERT (Ab [p] == 0 || Ab [p] == 1) ;
73             }
74             Ap [j] = ajnz ;
75         }
76 
77     }
78     else
79     {
80 
81         //----------------------------------------------------------------------
82         // compute blocks of rows in parallel
83         //----------------------------------------------------------------------
84 
85         // allocate one row of W per thread, each row of length avdim
86         W = GB_MALLOC_WERK (nthreads * avdim, int64_t, &W_size) ;
87         if (W == NULL)
88         {
89             // out of memory
90             return (GrB_OUT_OF_MEMORY) ;
91         }
92 
93         int taskid ;
94         #pragma omp parallel for num_threads(nthreads) schedule(static)
95         for (taskid = 0 ; taskid < nthreads ; taskid++)
96         {
97             int64_t *restrict Wtask = W + taskid * avdim ;
98             int64_t istart, iend ;
99             GB_PARTITION (istart, iend, avlen, taskid, nthreads) ;
100             for (int64_t j = 0 ; j < avdim ; j++)
101             {
102                 // ajnz = nnz (A (istart:iend-1,j))
103                 int64_t ajnz = 0 ;
104                 int64_t pA_start = j * avlen ;
105                 for (int64_t i = istart ; i < iend ; i++)
106                 {
107                     // see if A(i,j) is present in the bitmap
108                     int64_t p = i + pA_start ;
109                     ajnz += Ab [p] ;
110                     ASSERT (Ab [p] == 0 || Ab [p] == 1) ;
111                 }
112                 Wtask [j] = ajnz ;
113             }
114         }
115 
116         // cumulative sum to compute nnz(A(:,j)) for each vector j
117         int64_t j ;
118         #pragma omp parallel for num_threads(nthreads) schedule(static)
119         for (j = 0 ; j < avdim ; j++)
120         {
121             int64_t ajnz = 0 ;
122             for (int taskid = 0 ; taskid < nthreads ; taskid++)
123             {
124                 int64_t *restrict Wtask = W + taskid * avdim ;
125                 int64_t c = Wtask [j] ;
126                 Wtask [j] = ajnz ;
127                 ajnz += c ;
128             }
129             Ap [j] = ajnz ;
130         }
131     }
132 
133     //--------------------------------------------------------------------------
134     // cumulative sum of Ap
135     //--------------------------------------------------------------------------
136 
137     int nth = GB_nthreads (avdim, chunk, nthreads_max) ;
138     GB_cumsum (Ap, avdim, anvec_nonempty, nth, Context) ;
139     int64_t anz = Ap [avdim] ;
140     ASSERT (anz == A->nvals) ;
141 
142     //--------------------------------------------------------------------------
143     // gather the pattern and values from the bitmap
144     //--------------------------------------------------------------------------
145 
146     // TODO: add type-specific versions for built-in types
147 
148     const GB_void *restrict Ax = (GB_void *) (A->x) ;
149 
150     if (by_vector)
151     {
152 
153         //----------------------------------------------------------------------
154         // construct all vectors in parallel (no workspace)
155         //----------------------------------------------------------------------
156 
157         int64_t j ;
158         #pragma omp parallel for num_threads(nthreads) schedule(static)
159         for (j = 0 ; j < avdim ; j++)
160         {
161             // gather from the bitmap into the new A (:,j)
162             int64_t pnew = Ap [j] ;
163             int64_t pA_start = j * avlen ;
164             for (int64_t i = 0 ; i < avlen ; i++)
165             {
166                 int64_t p = i + pA_start ;
167                 if (Ab [p])
168                 {
169                     // A(i,j) is in the bitmap
170                     if (Ai != NULL) Ai [pnew] = i ;
171                     if (Aj != NULL) Aj [pnew] = j ;
172                     if (Ax_new != NULL)
173                     {
174                         // Ax_new [pnew] = Ax [p])
175                         memcpy (Ax_new +(pnew)*asize, Ax +(p)*asize, asize) ;
176                     }
177                     pnew++ ;
178                 }
179             }
180             ASSERT (pnew == Ap [j+1]) ;
181         }
182 
183     }
184     else
185     {
186 
187         //----------------------------------------------------------------------
188         // compute blocks of rows in parallel
189         //----------------------------------------------------------------------
190 
191         int taskid ;
192         #pragma omp parallel for num_threads(nthreads) schedule(static)
193         for (taskid = 0 ; taskid < nthreads ; taskid++)
194         {
195             int64_t *restrict Wtask = W + taskid * avdim ;
196             int64_t istart, iend ;
197             GB_PARTITION (istart, iend, avlen, taskid, nthreads) ;
198             for (int64_t j = 0 ; j < avdim ; j++)
199             {
200                 // gather from the bitmap into the new A (:,j)
201                 int64_t pnew = Ap [j] + Wtask [j] ;
202                 int64_t pA_start = j * avlen ;
203                 for (int64_t i = istart ; i < iend ; i++)
204                 {
205                     // see if A(i,j) is present in the bitmap
206                     int64_t p = i + pA_start ;
207                     if (Ab [p])
208                     {
209                         // A(i,j) is in the bitmap
210                         if (Ai != NULL) Ai [pnew] = i ;
211                         if (Aj != NULL) Aj [pnew] = j ;
212                         if (Ax_new != NULL)
213                         {
214                             // Ax_new [pnew] = Ax [p] ;
215                             memcpy (Ax_new +(pnew)*asize, Ax +(p)*asize, asize);
216                         }
217                         pnew++ ;
218                     }
219                 }
220             }
221         }
222     }
223 
224     //--------------------------------------------------------------------------
225     // free workspace return result
226     //--------------------------------------------------------------------------
227 
228     GB_FREE_WERK (&W, W_size) ;
229     return (GrB_SUCCESS) ;
230 }
231 
232