1 /* @cond INNERDOC */
2 /**
3 * @file
4 * @brief Auxiliary functions.
5 */
6
7 /*
8
9 Copyright (C) 2008-2019 Michele Martone
10
11 This file is part of librsb.
12
13 librsb is free software; you can redistribute it and/or modify it
14 under the terms of the GNU Lesser General Public License as published
15 by the Free Software Foundation; either version 3 of the License, or
16 (at your option) any later version.
17
18 librsb is distributed in the hope that it will be useful, but WITHOUT
19 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License along with librsb; see the file COPYING.
25 If not, see <http://www.gnu.org/licenses/>.
26
27 */
28 /*
29 The code in this file was generated automatically by an M4 script.
30 It is not meant to be used as an API (Application Programming Interface).
31 p.s.: right now, only row major matrix access is considered.
32
33 */
34
35
36 #include "rsb_common.h"
37 #ifndef RSB_PS_ASSERT
38 /*#define RSB_PS_ASSERT(e) assert(e) */ /* uncomment this to use asserts */
39 #define RSB_PS_ASSERT(e) /* uncomment this to avoid asserts */
40 #else /* RSB_PS_ASSERT */
41 #undef RSB_PS_ASSERT
42 #define RSB_PS_ASSERT(e)
43 #endif /* RSB_PS_ASSERT */
rsb__do_util_merge_sorted_subarrays_in_place_double(double * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_char_t * W,rsb_nnz_idx_t annz,rsb_nnz_idx_t bnnz,size_t wsize,rsb_flags_t flags)44 rsb_err_t rsb__do_util_merge_sorted_subarrays_in_place_double(
45 double *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_char_t * W,
46 rsb_nnz_idx_t annz, rsb_nnz_idx_t bnnz,
47 size_t wsize, rsb_flags_t flags
48 )
49 {
50 /**
51 * \ingroup gr_util
52 * A B
53 * +----------+----------+
54 * <- annz -> <- bnnz ->
55 *
56 * W
57 * +------------+
58 * <- wnnz ->
59 *
60 * Merges an array containing two ordered subarrays A and B,
61 * sized respectively with annz and bnnz elements, using a
62 * swap area sized wsize bytes.
63 *
64 * NOTE: this is NOT an optimized code, just a naive one to have this functionality working.
65 */
66 rsb_int_t wpasses;
67 rsb_nnz_idx_t wnnz,nnz=annz+bnnz;
68 double *VW=NULL;
69 rsb_coo_idx_t * IW=NULL;
70 rsb_coo_idx_t * JW=NULL;
71 double *VB=NULL;
72 rsb_coo_idx_t * IB=NULL;
73 rsb_coo_idx_t * JB=NULL;
74 size_t el_size=sizeof(double);
75 int step;
76 rsb_nnz_idx_t aoff=0,boff=0,woff=0;
77
78 wnnz=wsize/(el_size+2*sizeof(rsb_coo_idx_t));
79 VW=(double*)W;
80 W+=el_size*wnnz;
81 IW=(rsb_coo_idx_t*)W;
82 W+=sizeof(rsb_coo_idx_t)*wnnz;
83 JW=(rsb_coo_idx_t*)W;
84
85 VB=VA+annz;
86 IB=IA+annz;
87 JB=JA+annz;
88
89 wpasses=(annz+bnnz+wnnz-1)/wnnz;
90
91 #define RSB_COO_MOVE(VD,ID,JD,VS,IS,JS,doff,soff) \
92 VD[doff]=VS[soff], \
93 ID[doff]=IS[soff], \
94 JD[doff]=JS[soff],++soff,++doff
95
96 #define RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff) \
97 IA[aoff]<IB[boff] || ( IA[aoff]==IB[boff] && JA[aoff] < JB[boff] )
98
99 /* RSB_STDOUT(" * \n");*/
100 /* RSB_STDOUT("wsize=%d steps:%d wnnz=%d nnz=%d\n",wsize,wpasses,wnnz,nnz);*/
101
102 /* RSB_STDOUT("SSSSSSsentinel:%x %d %d\n",IA+annz+bnnz,IA[annz+bnnz],JA[annz+bnnz]);*/
103 for(step=0;step<wpasses;++step)
104 {
105 rsb_nnz_idx_t cnnz;
106 if(step==wpasses-1)
107 wnnz=nnz-step*wnnz;
108
109 cnnz=wnnz;
110 cnnz = RSB_MIN(cnnz,annz);
111 cnnz = RSB_MIN(cnnz,bnnz);
112 /* RSB_STDOUT("step:%d wnnz=%d annz=%d bnnz=%d cnnz=%d\n",step,wnnz,annz,bnnz,cnnz);*/
113 /* merge wnnz elements from A and B in W */
114 woff=0;
115 aoff=boff=0;
116 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
117 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
118 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+bnnz,IB[bnnz],JB[bnnz]); */
119 while(woff<wnnz && aoff<annz && boff<bnnz)
120 {
121 if(RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff))
122 RSB_COO_MOVE(VW,IW,JW,VA,IA,JA,woff,aoff);
123 else
124 RSB_COO_MOVE(VW,IW,JW,VB,IB,JB,woff,boff);
125 }
126 /* RSB_STDOUT("aoff=%d boff=%d woff=%d\n",aoff,boff,woff);*/
127 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,woff,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
128 if(woff<wnnz)
129 {
130 if(aoff==annz)
131 RSB_COO_MEMMOVE(VW,IW,JW,VB,IB,JB,woff,boff,wnnz-woff,el_size),boff+=(wnnz-woff);
132 else
133 if(boff==bnnz)
134 RSB_COO_MEMMOVE(VW,IW,JW,VA,IA,JA,woff,aoff,wnnz-woff,el_size),aoff+=(wnnz-woff);
135 }
136 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,wnnz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
137 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
138 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
139 /* RSB_STDOUT("aoff:%d boff=%d wnnz=%d annz=%d\n",aoff,boff,wnnz,annz);*/
140 /* memmove A boff places forward */
141 bnnz-=boff;
142 annz-=aoff;
143 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
144 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
145 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
146 RSB_COO_MEMMOVE(VA,IA,JA,VA,IA,JA,wnnz,aoff,annz,el_size);
147 /* RSB_STDOUT("PSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
148 VB+=boff;
149 IB+=boff;
150 JB+=boff;
151 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
152 RSB_COO_MEMMOVE(VA,IA,JA,VW,IW,JW,0,0,wnnz,el_size);
153 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,wnnz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
154 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE ,NULL,flags));
155 VA+=wnnz;
156 IA+=wnnz;
157 JA+=wnnz;
158 }
159 return RSB_ERR_NO_ERROR;
160 #undef RSB_COO_MOVE
161 #undef RSB_CMP_COO_LESS_THAN
162 }
rsb__do_util_merge_sorted_subarrays_in_place_float(float * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_char_t * W,rsb_nnz_idx_t annz,rsb_nnz_idx_t bnnz,size_t wsize,rsb_flags_t flags)163 rsb_err_t rsb__do_util_merge_sorted_subarrays_in_place_float(
164 float *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_char_t * W,
165 rsb_nnz_idx_t annz, rsb_nnz_idx_t bnnz,
166 size_t wsize, rsb_flags_t flags
167 )
168 {
169 /**
170 * \ingroup gr_util
171 * A B
172 * +----------+----------+
173 * <- annz -> <- bnnz ->
174 *
175 * W
176 * +------------+
177 * <- wnnz ->
178 *
179 * Merges an array containing two ordered subarrays A and B,
180 * sized respectively with annz and bnnz elements, using a
181 * swap area sized wsize bytes.
182 *
183 * NOTE: this is NOT an optimized code, just a naive one to have this functionality working.
184 */
185 rsb_int_t wpasses;
186 rsb_nnz_idx_t wnnz,nnz=annz+bnnz;
187 float *VW=NULL;
188 rsb_coo_idx_t * IW=NULL;
189 rsb_coo_idx_t * JW=NULL;
190 float *VB=NULL;
191 rsb_coo_idx_t * IB=NULL;
192 rsb_coo_idx_t * JB=NULL;
193 size_t el_size=sizeof(float);
194 int step;
195 rsb_nnz_idx_t aoff=0,boff=0,woff=0;
196
197 wnnz=wsize/(el_size+2*sizeof(rsb_coo_idx_t));
198 VW=(float*)W;
199 W+=el_size*wnnz;
200 IW=(rsb_coo_idx_t*)W;
201 W+=sizeof(rsb_coo_idx_t)*wnnz;
202 JW=(rsb_coo_idx_t*)W;
203
204 VB=VA+annz;
205 IB=IA+annz;
206 JB=JA+annz;
207
208 wpasses=(annz+bnnz+wnnz-1)/wnnz;
209
210 #define RSB_COO_MOVE(VD,ID,JD,VS,IS,JS,doff,soff) \
211 VD[doff]=VS[soff], \
212 ID[doff]=IS[soff], \
213 JD[doff]=JS[soff],++soff,++doff
214
215 #define RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff) \
216 IA[aoff]<IB[boff] || ( IA[aoff]==IB[boff] && JA[aoff] < JB[boff] )
217
218 /* RSB_STDOUT(" * \n");*/
219 /* RSB_STDOUT("wsize=%d steps:%d wnnz=%d nnz=%d\n",wsize,wpasses,wnnz,nnz);*/
220
221 /* RSB_STDOUT("SSSSSSsentinel:%x %d %d\n",IA+annz+bnnz,IA[annz+bnnz],JA[annz+bnnz]);*/
222 for(step=0;step<wpasses;++step)
223 {
224 rsb_nnz_idx_t cnnz;
225 if(step==wpasses-1)
226 wnnz=nnz-step*wnnz;
227
228 cnnz=wnnz;
229 cnnz = RSB_MIN(cnnz,annz);
230 cnnz = RSB_MIN(cnnz,bnnz);
231 /* RSB_STDOUT("step:%d wnnz=%d annz=%d bnnz=%d cnnz=%d\n",step,wnnz,annz,bnnz,cnnz);*/
232 /* merge wnnz elements from A and B in W */
233 woff=0;
234 aoff=boff=0;
235 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
236 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
237 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+bnnz,IB[bnnz],JB[bnnz]); */
238 while(woff<wnnz && aoff<annz && boff<bnnz)
239 {
240 if(RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff))
241 RSB_COO_MOVE(VW,IW,JW,VA,IA,JA,woff,aoff);
242 else
243 RSB_COO_MOVE(VW,IW,JW,VB,IB,JB,woff,boff);
244 }
245 /* RSB_STDOUT("aoff=%d boff=%d woff=%d\n",aoff,boff,woff);*/
246 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,woff,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
247 if(woff<wnnz)
248 {
249 if(aoff==annz)
250 RSB_COO_MEMMOVE(VW,IW,JW,VB,IB,JB,woff,boff,wnnz-woff,el_size),boff+=(wnnz-woff);
251 else
252 if(boff==bnnz)
253 RSB_COO_MEMMOVE(VW,IW,JW,VA,IA,JA,woff,aoff,wnnz-woff,el_size),aoff+=(wnnz-woff);
254 }
255 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,wnnz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
256 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
257 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
258 /* RSB_STDOUT("aoff:%d boff=%d wnnz=%d annz=%d\n",aoff,boff,wnnz,annz);*/
259 /* memmove A boff places forward */
260 bnnz-=boff;
261 annz-=aoff;
262 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
263 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
264 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
265 RSB_COO_MEMMOVE(VA,IA,JA,VA,IA,JA,wnnz,aoff,annz,el_size);
266 /* RSB_STDOUT("PSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
267 VB+=boff;
268 IB+=boff;
269 JB+=boff;
270 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
271 RSB_COO_MEMMOVE(VA,IA,JA,VW,IW,JW,0,0,wnnz,el_size);
272 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,wnnz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
273 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT ,NULL,flags));
274 VA+=wnnz;
275 IA+=wnnz;
276 JA+=wnnz;
277 }
278 return RSB_ERR_NO_ERROR;
279 #undef RSB_COO_MOVE
280 #undef RSB_CMP_COO_LESS_THAN
281 }
rsb__do_util_merge_sorted_subarrays_in_place_float_complex(float complex * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_char_t * W,rsb_nnz_idx_t annz,rsb_nnz_idx_t bnnz,size_t wsize,rsb_flags_t flags)282 rsb_err_t rsb__do_util_merge_sorted_subarrays_in_place_float_complex(
283 float complex *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_char_t * W,
284 rsb_nnz_idx_t annz, rsb_nnz_idx_t bnnz,
285 size_t wsize, rsb_flags_t flags
286 )
287 {
288 /**
289 * \ingroup gr_util
290 * A B
291 * +----------+----------+
292 * <- annz -> <- bnnz ->
293 *
294 * W
295 * +------------+
296 * <- wnnz ->
297 *
298 * Merges an array containing two ordered subarrays A and B,
299 * sized respectively with annz and bnnz elements, using a
300 * swap area sized wsize bytes.
301 *
302 * NOTE: this is NOT an optimized code, just a naive one to have this functionality working.
303 */
304 rsb_int_t wpasses;
305 rsb_nnz_idx_t wnnz,nnz=annz+bnnz;
306 float complex *VW=NULL;
307 rsb_coo_idx_t * IW=NULL;
308 rsb_coo_idx_t * JW=NULL;
309 float complex *VB=NULL;
310 rsb_coo_idx_t * IB=NULL;
311 rsb_coo_idx_t * JB=NULL;
312 size_t el_size=sizeof(float complex);
313 int step;
314 rsb_nnz_idx_t aoff=0,boff=0,woff=0;
315
316 wnnz=wsize/(el_size+2*sizeof(rsb_coo_idx_t));
317 VW=(float complex*)W;
318 W+=el_size*wnnz;
319 IW=(rsb_coo_idx_t*)W;
320 W+=sizeof(rsb_coo_idx_t)*wnnz;
321 JW=(rsb_coo_idx_t*)W;
322
323 VB=VA+annz;
324 IB=IA+annz;
325 JB=JA+annz;
326
327 wpasses=(annz+bnnz+wnnz-1)/wnnz;
328
329 #define RSB_COO_MOVE(VD,ID,JD,VS,IS,JS,doff,soff) \
330 VD[doff]=VS[soff], \
331 ID[doff]=IS[soff], \
332 JD[doff]=JS[soff],++soff,++doff
333
334 #define RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff) \
335 IA[aoff]<IB[boff] || ( IA[aoff]==IB[boff] && JA[aoff] < JB[boff] )
336
337 /* RSB_STDOUT(" * \n");*/
338 /* RSB_STDOUT("wsize=%d steps:%d wnnz=%d nnz=%d\n",wsize,wpasses,wnnz,nnz);*/
339
340 /* RSB_STDOUT("SSSSSSsentinel:%x %d %d\n",IA+annz+bnnz,IA[annz+bnnz],JA[annz+bnnz]);*/
341 for(step=0;step<wpasses;++step)
342 {
343 rsb_nnz_idx_t cnnz;
344 if(step==wpasses-1)
345 wnnz=nnz-step*wnnz;
346
347 cnnz=wnnz;
348 cnnz = RSB_MIN(cnnz,annz);
349 cnnz = RSB_MIN(cnnz,bnnz);
350 /* RSB_STDOUT("step:%d wnnz=%d annz=%d bnnz=%d cnnz=%d\n",step,wnnz,annz,bnnz,cnnz);*/
351 /* merge wnnz elements from A and B in W */
352 woff=0;
353 aoff=boff=0;
354 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
355 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
356 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+bnnz,IB[bnnz],JB[bnnz]); */
357 while(woff<wnnz && aoff<annz && boff<bnnz)
358 {
359 if(RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff))
360 RSB_COO_MOVE(VW,IW,JW,VA,IA,JA,woff,aoff);
361 else
362 RSB_COO_MOVE(VW,IW,JW,VB,IB,JB,woff,boff);
363 }
364 /* RSB_STDOUT("aoff=%d boff=%d woff=%d\n",aoff,boff,woff);*/
365 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,woff,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
366 if(woff<wnnz)
367 {
368 if(aoff==annz)
369 RSB_COO_MEMMOVE(VW,IW,JW,VB,IB,JB,woff,boff,wnnz-woff,el_size),boff+=(wnnz-woff);
370 else
371 if(boff==bnnz)
372 RSB_COO_MEMMOVE(VW,IW,JW,VA,IA,JA,woff,aoff,wnnz-woff,el_size),aoff+=(wnnz-woff);
373 }
374 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,wnnz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
375 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
376 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
377 /* RSB_STDOUT("aoff:%d boff=%d wnnz=%d annz=%d\n",aoff,boff,wnnz,annz);*/
378 /* memmove A boff places forward */
379 bnnz-=boff;
380 annz-=aoff;
381 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
382 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
383 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
384 RSB_COO_MEMMOVE(VA,IA,JA,VA,IA,JA,wnnz,aoff,annz,el_size);
385 /* RSB_STDOUT("PSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
386 VB+=boff;
387 IB+=boff;
388 JB+=boff;
389 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
390 RSB_COO_MEMMOVE(VA,IA,JA,VW,IW,JW,0,0,wnnz,el_size);
391 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,wnnz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
392 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ,NULL,flags));
393 VA+=wnnz;
394 IA+=wnnz;
395 JA+=wnnz;
396 }
397 return RSB_ERR_NO_ERROR;
398 #undef RSB_COO_MOVE
399 #undef RSB_CMP_COO_LESS_THAN
400 }
rsb__do_util_merge_sorted_subarrays_in_place_double_complex(double complex * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_char_t * W,rsb_nnz_idx_t annz,rsb_nnz_idx_t bnnz,size_t wsize,rsb_flags_t flags)401 rsb_err_t rsb__do_util_merge_sorted_subarrays_in_place_double_complex(
402 double complex *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_char_t * W,
403 rsb_nnz_idx_t annz, rsb_nnz_idx_t bnnz,
404 size_t wsize, rsb_flags_t flags
405 )
406 {
407 /**
408 * \ingroup gr_util
409 * A B
410 * +----------+----------+
411 * <- annz -> <- bnnz ->
412 *
413 * W
414 * +------------+
415 * <- wnnz ->
416 *
417 * Merges an array containing two ordered subarrays A and B,
418 * sized respectively with annz and bnnz elements, using a
419 * swap area sized wsize bytes.
420 *
421 * NOTE: this is NOT an optimized code, just a naive one to have this functionality working.
422 */
423 rsb_int_t wpasses;
424 rsb_nnz_idx_t wnnz,nnz=annz+bnnz;
425 double complex *VW=NULL;
426 rsb_coo_idx_t * IW=NULL;
427 rsb_coo_idx_t * JW=NULL;
428 double complex *VB=NULL;
429 rsb_coo_idx_t * IB=NULL;
430 rsb_coo_idx_t * JB=NULL;
431 size_t el_size=sizeof(double complex);
432 int step;
433 rsb_nnz_idx_t aoff=0,boff=0,woff=0;
434
435 wnnz=wsize/(el_size+2*sizeof(rsb_coo_idx_t));
436 VW=(double complex*)W;
437 W+=el_size*wnnz;
438 IW=(rsb_coo_idx_t*)W;
439 W+=sizeof(rsb_coo_idx_t)*wnnz;
440 JW=(rsb_coo_idx_t*)W;
441
442 VB=VA+annz;
443 IB=IA+annz;
444 JB=JA+annz;
445
446 wpasses=(annz+bnnz+wnnz-1)/wnnz;
447
448 #define RSB_COO_MOVE(VD,ID,JD,VS,IS,JS,doff,soff) \
449 VD[doff]=VS[soff], \
450 ID[doff]=IS[soff], \
451 JD[doff]=JS[soff],++soff,++doff
452
453 #define RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff) \
454 IA[aoff]<IB[boff] || ( IA[aoff]==IB[boff] && JA[aoff] < JB[boff] )
455
456 /* RSB_STDOUT(" * \n");*/
457 /* RSB_STDOUT("wsize=%d steps:%d wnnz=%d nnz=%d\n",wsize,wpasses,wnnz,nnz);*/
458
459 /* RSB_STDOUT("SSSSSSsentinel:%x %d %d\n",IA+annz+bnnz,IA[annz+bnnz],JA[annz+bnnz]);*/
460 for(step=0;step<wpasses;++step)
461 {
462 rsb_nnz_idx_t cnnz;
463 if(step==wpasses-1)
464 wnnz=nnz-step*wnnz;
465
466 cnnz=wnnz;
467 cnnz = RSB_MIN(cnnz,annz);
468 cnnz = RSB_MIN(cnnz,bnnz);
469 /* RSB_STDOUT("step:%d wnnz=%d annz=%d bnnz=%d cnnz=%d\n",step,wnnz,annz,bnnz,cnnz);*/
470 /* merge wnnz elements from A and B in W */
471 woff=0;
472 aoff=boff=0;
473 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
474 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
475 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+bnnz,IB[bnnz],JB[bnnz]); */
476 while(woff<wnnz && aoff<annz && boff<bnnz)
477 {
478 if(RSB_CMP_COO_LESS_THAN(IA,JA,IB,JB,aoff,boff))
479 RSB_COO_MOVE(VW,IW,JW,VA,IA,JA,woff,aoff);
480 else
481 RSB_COO_MOVE(VW,IW,JW,VB,IB,JB,woff,boff);
482 }
483 /* RSB_STDOUT("aoff=%d boff=%d woff=%d\n",aoff,boff,woff);*/
484 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,woff,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
485 if(woff<wnnz)
486 {
487 if(aoff==annz)
488 RSB_COO_MEMMOVE(VW,IW,JW,VB,IB,JB,woff,boff,wnnz-woff,el_size),boff+=(wnnz-woff);
489 else
490 if(boff==bnnz)
491 RSB_COO_MEMMOVE(VW,IW,JW,VA,IA,JA,woff,aoff,wnnz-woff,el_size),aoff+=(wnnz-woff);
492 }
493 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VW,IW,JW,wnnz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
494 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
495 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
496 /* RSB_STDOUT("aoff:%d boff=%d wnnz=%d annz=%d\n",aoff,boff,wnnz,annz);*/
497 /* memmove A boff places forward */
498 bnnz-=boff;
499 annz-=aoff;
500 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
501 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
502 /* RSB_STDOUT("SSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
503 RSB_COO_MEMMOVE(VA,IA,JA,VA,IA,JA,wnnz,aoff,annz,el_size);
504 /* RSB_STDOUT("PSSSsentinel:%x %d %d\n",IB+boff+bnnz,IB[boff+bnnz],JB[boff+bnnz]);*/
505 VB+=boff;
506 IB+=boff;
507 JB+=boff;
508 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VB,IB,JB,bnnz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
509 RSB_COO_MEMMOVE(VA,IA,JA,VW,IW,JW,0,0,wnnz,el_size);
510 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,wnnz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
511 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,annz,RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ,NULL,flags));
512 VA+=wnnz;
513 IA+=wnnz;
514 JA+=wnnz;
515 }
516 return RSB_ERR_NO_ERROR;
517 #undef RSB_COO_MOVE
518 #undef RSB_CMP_COO_LESS_THAN
519 }
rsb__do_util_merge_sorted_subarrays_in_place(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_char_t * W,rsb_nnz_idx_t annz,rsb_nnz_idx_t bnnz,size_t wsize,rsb_flags_t flags,rsb_type_t typecode)520 rsb_err_t rsb__do_util_merge_sorted_subarrays_in_place(
521 void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_char_t * W,
522 rsb_nnz_idx_t annz, rsb_nnz_idx_t bnnz,
523 size_t wsize, rsb_flags_t flags, rsb_type_t typecode
524 )
525 {
526 switch(typecode)
527 {
528 case RSB_NUMERICAL_TYPE_DOUBLE :
529 return rsb__do_util_merge_sorted_subarrays_in_place_double(VA,IA,JA,W,annz,bnnz,wsize,flags);
530 break;
531 case RSB_NUMERICAL_TYPE_FLOAT :
532 return rsb__do_util_merge_sorted_subarrays_in_place_float(VA,IA,JA,W,annz,bnnz,wsize,flags);
533 break;
534 case RSB_NUMERICAL_TYPE_FLOAT_COMPLEX :
535 return rsb__do_util_merge_sorted_subarrays_in_place_float_complex(VA,IA,JA,W,annz,bnnz,wsize,flags);
536 break;
537 case RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX :
538 return rsb__do_util_merge_sorted_subarrays_in_place_double_complex(VA,IA,JA,W,annz,bnnz,wsize,flags);
539 break;
540 default :
541 return RSB_ERR_UNSUPPORTED_TYPE;
542 }
543 }
544
545 /* @endcond */
546