1 //------------------------------------------------------------------------------
2 // GB_convert_bitmap_worker: construct triplets or CSC/CSR from bitmap
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // TODO allow this function to do typecasting. Create 169 different versions
11 // for all 13x13 versions. Use this as part of Method 24, C=A assignment.
12 // Can also use typecasting for GB_Matrix_diag.
13
14 #include "GB.h"
15 #include "GB_partition.h"
16
GB_convert_bitmap_worker(int64_t * restrict Ap,int64_t * restrict Ai,int64_t * restrict Aj,GB_void * restrict Ax_new,int64_t * anvec_nonempty,const GrB_Matrix A,GB_Context Context)17 GrB_Info GB_convert_bitmap_worker // extract CSC/CSR or triplets from bitmap
18 (
19 // outputs:
20 int64_t *restrict Ap, // vector pointers for CSC/CSR form
21 int64_t *restrict Ai, // indices for CSC/CSR or triplet form
22 int64_t *restrict Aj, // vector indices for triplet form
23 GB_void *restrict Ax_new, // values for CSC/CSR or triplet form
24 int64_t *anvec_nonempty, // # of non-empty vectors
25 // inputs: not modified
26 const GrB_Matrix A, // matrix to extract; not modified
27 GB_Context Context
28 )
29 {
30
31 //--------------------------------------------------------------------------
32 // check inputs
33 //--------------------------------------------------------------------------
34
35 ASSERT (GB_IS_BITMAP (A)) ;
36 ASSERT (Ap != NULL) ; // must be provided on input, size avdim+1
37
38 int64_t *restrict W = NULL ; size_t W_size = 0 ;
39 const int64_t avdim = A->vdim ;
40 const int64_t avlen = A->vlen ;
41 const size_t asize = A->type->size ;
42
43 //--------------------------------------------------------------------------
44 // count the entries in each vector
45 //--------------------------------------------------------------------------
46
47 const int8_t *restrict Ab = A->b ;
48
49 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
50 int nthreads = GB_nthreads (avlen*avdim, chunk, nthreads_max) ;
51 bool by_vector = (nthreads <= avdim) ;
52
53 if (by_vector)
54 {
55
56 //----------------------------------------------------------------------
57 // compute all vectors in parallel (no workspace)
58 //----------------------------------------------------------------------
59
60 int64_t j ;
61 #pragma omp parallel for num_threads(nthreads) schedule(static)
62 for (j = 0 ; j < avdim ; j++)
63 {
64 // ajnz = nnz (A (:,j))
65 int64_t ajnz = 0 ;
66 int64_t pA_start = j * avlen ;
67 for (int64_t i = 0 ; i < avlen ; i++)
68 {
69 // see if A(i,j) is present in the bitmap
70 int64_t p = i + pA_start ;
71 ajnz += Ab [p] ;
72 ASSERT (Ab [p] == 0 || Ab [p] == 1) ;
73 }
74 Ap [j] = ajnz ;
75 }
76
77 }
78 else
79 {
80
81 //----------------------------------------------------------------------
82 // compute blocks of rows in parallel
83 //----------------------------------------------------------------------
84
85 // allocate one row of W per thread, each row of length avdim
86 W = GB_MALLOC_WERK (nthreads * avdim, int64_t, &W_size) ;
87 if (W == NULL)
88 {
89 // out of memory
90 return (GrB_OUT_OF_MEMORY) ;
91 }
92
93 int taskid ;
94 #pragma omp parallel for num_threads(nthreads) schedule(static)
95 for (taskid = 0 ; taskid < nthreads ; taskid++)
96 {
97 int64_t *restrict Wtask = W + taskid * avdim ;
98 int64_t istart, iend ;
99 GB_PARTITION (istart, iend, avlen, taskid, nthreads) ;
100 for (int64_t j = 0 ; j < avdim ; j++)
101 {
102 // ajnz = nnz (A (istart:iend-1,j))
103 int64_t ajnz = 0 ;
104 int64_t pA_start = j * avlen ;
105 for (int64_t i = istart ; i < iend ; i++)
106 {
107 // see if A(i,j) is present in the bitmap
108 int64_t p = i + pA_start ;
109 ajnz += Ab [p] ;
110 ASSERT (Ab [p] == 0 || Ab [p] == 1) ;
111 }
112 Wtask [j] = ajnz ;
113 }
114 }
115
116 // cumulative sum to compute nnz(A(:,j)) for each vector j
117 int64_t j ;
118 #pragma omp parallel for num_threads(nthreads) schedule(static)
119 for (j = 0 ; j < avdim ; j++)
120 {
121 int64_t ajnz = 0 ;
122 for (int taskid = 0 ; taskid < nthreads ; taskid++)
123 {
124 int64_t *restrict Wtask = W + taskid * avdim ;
125 int64_t c = Wtask [j] ;
126 Wtask [j] = ajnz ;
127 ajnz += c ;
128 }
129 Ap [j] = ajnz ;
130 }
131 }
132
133 //--------------------------------------------------------------------------
134 // cumulative sum of Ap
135 //--------------------------------------------------------------------------
136
137 int nth = GB_nthreads (avdim, chunk, nthreads_max) ;
138 GB_cumsum (Ap, avdim, anvec_nonempty, nth, Context) ;
139 int64_t anz = Ap [avdim] ;
140 ASSERT (anz == A->nvals) ;
141
142 //--------------------------------------------------------------------------
143 // gather the pattern and values from the bitmap
144 //--------------------------------------------------------------------------
145
146 // TODO: add type-specific versions for built-in types
147
148 const GB_void *restrict Ax = (GB_void *) (A->x) ;
149
150 if (by_vector)
151 {
152
153 //----------------------------------------------------------------------
154 // construct all vectors in parallel (no workspace)
155 //----------------------------------------------------------------------
156
157 int64_t j ;
158 #pragma omp parallel for num_threads(nthreads) schedule(static)
159 for (j = 0 ; j < avdim ; j++)
160 {
161 // gather from the bitmap into the new A (:,j)
162 int64_t pnew = Ap [j] ;
163 int64_t pA_start = j * avlen ;
164 for (int64_t i = 0 ; i < avlen ; i++)
165 {
166 int64_t p = i + pA_start ;
167 if (Ab [p])
168 {
169 // A(i,j) is in the bitmap
170 if (Ai != NULL) Ai [pnew] = i ;
171 if (Aj != NULL) Aj [pnew] = j ;
172 if (Ax_new != NULL)
173 {
174 // Ax_new [pnew] = Ax [p])
175 memcpy (Ax_new +(pnew)*asize, Ax +(p)*asize, asize) ;
176 }
177 pnew++ ;
178 }
179 }
180 ASSERT (pnew == Ap [j+1]) ;
181 }
182
183 }
184 else
185 {
186
187 //----------------------------------------------------------------------
188 // compute blocks of rows in parallel
189 //----------------------------------------------------------------------
190
191 int taskid ;
192 #pragma omp parallel for num_threads(nthreads) schedule(static)
193 for (taskid = 0 ; taskid < nthreads ; taskid++)
194 {
195 int64_t *restrict Wtask = W + taskid * avdim ;
196 int64_t istart, iend ;
197 GB_PARTITION (istart, iend, avlen, taskid, nthreads) ;
198 for (int64_t j = 0 ; j < avdim ; j++)
199 {
200 // gather from the bitmap into the new A (:,j)
201 int64_t pnew = Ap [j] + Wtask [j] ;
202 int64_t pA_start = j * avlen ;
203 for (int64_t i = istart ; i < iend ; i++)
204 {
205 // see if A(i,j) is present in the bitmap
206 int64_t p = i + pA_start ;
207 if (Ab [p])
208 {
209 // A(i,j) is in the bitmap
210 if (Ai != NULL) Ai [pnew] = i ;
211 if (Aj != NULL) Aj [pnew] = j ;
212 if (Ax_new != NULL)
213 {
214 // Ax_new [pnew] = Ax [p] ;
215 memcpy (Ax_new +(pnew)*asize, Ax +(p)*asize, asize);
216 }
217 pnew++ ;
218 }
219 }
220 }
221 }
222 }
223
224 //--------------------------------------------------------------------------
225 // free workspace return result
226 //--------------------------------------------------------------------------
227
228 GB_FREE_WERK (&W, W_size) ;
229 return (GrB_SUCCESS) ;
230 }
231
232