1
2
3 /* @cond INNERDOC */
4 /**
5 * @file
6 * @brief
7 * Sorting functions.
8 */
9
10 /*
11
12 Copyright (C) 2008-2020 Michele Martone
13
14 This file is part of librsb.
15
16 librsb is free software; you can redistribute it and/or modify it
17 under the terms of the GNU Lesser General Public License as published
18 by the Free Software Foundation; either version 3 of the License, or
19 (at your option) any later version.
20
21 librsb is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
24 License for more details.
25
26 You should have received a copy of the GNU Lesser General Public
27 License along with librsb; see the file COPYING.
28 If not, see <http://www.gnu.org/licenses/>.
29
30 */
31 /*
32 The code in this file was generated automatically by an M4 script.
33 It is not meant to be used as an API (Application Programming Interface).
34 p.s.: right now, only row major matrix access is considered.
35
36 */
37
38
39 #ifdef __cplusplus
40 extern "C" {
41 #endif /* __cplusplus */
42
43
44
45
46 #include "rsb.h"
47 #include "rsb_common.h"
48 #include "rsb_internals.h"
49
50
51 extern struct rsb_session_handle_t rsb_global_session_handle;
52
rsb__do_mergesort_CSR(rsb_coo_idx_t * iarray,rsb_coo_idx_t * jarray,void * array,rsb_nnz_idx_t length,rsb_coo_idx_t * iresult,rsb_coo_idx_t * jresult,void * result,rsb_type_t type)53 rsb_err_t rsb__do_mergesort_CSR(
54 rsb_coo_idx_t *iarray,
55 rsb_coo_idx_t *jarray,
56 void *array,
57 rsb_nnz_idx_t length,
58 rsb_coo_idx_t *iresult,
59 rsb_coo_idx_t *jresult,
60 void *result,
61 rsb_type_t type)
62 {
63 /*!
64 * \ingroup gr_util
65 * This function will sort the non zero elements of a sparse matrix.
66 * It will read row and column indices arrays, the values array,
67 * and will sort them in separate output arrays.
68 *
69 * \param length the input arrays length
70 * \param iarray the input row indices array
71 * \param jarray the input column indices array
72 * \param array the input mtype array
73 * \param iresult the output row indices array
74 * \param jresult the output column indices array
75 * \param result the output values array
76 * FIXME : UNDOCUMENTED
77 * Will sort thethree arrays (iarray, jarray, array) following the
78 * criteria :
79 *
80 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
81 * i.e.: C (row major) ordering
82 */
83
84
85 if(type == RSB_NUMERICAL_TYPE_DOUBLE )
86 return rsb_do_mergesort_double_CSR(iarray, jarray,
87 array, length,
88 iresult, jresult,
89 result);
90 else
91 if(type == RSB_NUMERICAL_TYPE_FLOAT )
92 return rsb_do_mergesort_float_CSR(iarray, jarray,
93 array, length,
94 iresult, jresult,
95 result);
96 else
97 if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
98 return rsb_do_mergesort_float_complex_CSR(iarray, jarray,
99 array, length,
100 iresult, jresult,
101 result);
102 else
103 if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
104 return rsb_do_mergesort_double_complex_CSR(iarray, jarray,
105 array, length,
106 iresult, jresult,
107 result);
108 else
109 return RSB_ERR_UNSUPPORTED_TYPE;
110 }
111
rsb__do_mergesort_BCSR(rsb_coo_idx_t * iarray,rsb_coo_idx_t * jarray,void * array,rsb_nnz_idx_t length,rsb_coo_idx_t mb,rsb_coo_idx_t kb,rsb_coo_idx_t * iresult,rsb_coo_idx_t * jresult,void * result,rsb_type_t type)112 rsb_err_t rsb__do_mergesort_BCSR(
113 rsb_coo_idx_t *iarray,
114 rsb_coo_idx_t *jarray,
115 void *array,
116 rsb_nnz_idx_t length,
117 rsb_coo_idx_t mb,
118 rsb_coo_idx_t kb,
119 rsb_coo_idx_t *iresult,
120 rsb_coo_idx_t *jresult,
121 void *result,
122 rsb_type_t type)
123 {
124 /*!
125 * \ingroup gr_util
126 * This function will sort the non zero elements of a sparse matrix.
127 * It will read row and column indices arrays, the values array,
128 * and will sort them in separate output arrays.
129 *
130 * \param length the input arrays length
131 * \param iarray the input row indices array
132 * \param jarray the input column indices array
133 * \param array the input mtype array
134 * \param iresult the output row indices array
135 * \param jresult the output column indices array
136 * \param result the output values array
137
138 * FIXME : UNDOCUMENTED
139 * Will sort thethree arrays (iarray, jarray, array) following the
140 * criteria :
141 *
142 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
143 * i.e.: C (row major) ordering
144 */
145
146
147 if(type == RSB_NUMERICAL_TYPE_DOUBLE )
148 return rsb_do_mergesort_double_BCSR(iarray, jarray,
149 mb,kb,array, length,
150 iresult, jresult,
151 result);
152 else
153 if(type == RSB_NUMERICAL_TYPE_FLOAT )
154 return rsb_do_mergesort_float_BCSR(iarray, jarray,
155 mb,kb,array, length,
156 iresult, jresult,
157 result);
158 else
159 if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
160 return rsb_do_mergesort_float_complex_BCSR(iarray, jarray,
161 mb,kb,array, length,
162 iresult, jresult,
163 result);
164 else
165 if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
166 return rsb_do_mergesort_double_complex_BCSR(iarray, jarray,
167 mb,kb,array, length,
168 iresult, jresult,
169 result);
170 else
171 return RSB_ERR_UNSUPPORTED_TYPE;
172 }
173
rsb__do_mergesort_VBR(rsb_coo_idx_t * iarray,rsb_coo_idx_t * jarray,rsb_coo_idx_t * biarray,rsb_coo_idx_t * bjarray,void * array,rsb_nnz_idx_t length,rsb_coo_idx_t * iresult,rsb_coo_idx_t * jresult,rsb_coo_idx_t * biresult,rsb_coo_idx_t * bjresult,void * result,rsb_type_t type)174 rsb_err_t rsb__do_mergesort_VBR(
175 rsb_coo_idx_t *iarray,
176 rsb_coo_idx_t *jarray,
177 rsb_coo_idx_t *biarray,
178 rsb_coo_idx_t *bjarray,
179 void *array,
180 rsb_nnz_idx_t length,
181 rsb_coo_idx_t *iresult,
182 rsb_coo_idx_t *jresult,
183 rsb_coo_idx_t *biresult,
184 rsb_coo_idx_t *bjresult,
185 void *result,
186 rsb_type_t type)
187 {
188 /*!
189 * \ingroup gr_util
190 * This function will sort the non zero elements of a sparse blocked matrix.
191 * It will read row and column indices arrays, the values array,
192 * and will sort them in separate output arrays.
193 *
194 * \param length the input arrays length
195 * \param iarray the input row indices array
196 * \param jarray the input column indices array
197 * \param array the input mtype array
198 * \param iresult the output row indices array
199 * \param jresult the output column indices array
200 * \param result the output values array
201 * \param biarray the input block row indices array
202 * \param bjarray the input block column indices array
203 * \param biresult the output block row indices array
204 * \param bjresult the output block column indices array
205 * Will sort thethree arrays (iarray, jarray, array) following the
206 * criteria :
207 *
208 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
209 * i.e.: C (row major) ordering
210 */
211
212
213 if(type == RSB_NUMERICAL_TYPE_DOUBLE )
214 return rsb_do_mergesort_double_VBR(iarray, jarray,
215 biarray,bjarray,array, length,
216 iresult, jresult,
217 biresult,bjresult,result);
218 else
219 if(type == RSB_NUMERICAL_TYPE_FLOAT )
220 return rsb_do_mergesort_float_VBR(iarray, jarray,
221 biarray,bjarray,array, length,
222 iresult, jresult,
223 biresult,bjresult,result);
224 else
225 if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
226 return rsb_do_mergesort_float_complex_VBR(iarray, jarray,
227 biarray,bjarray,array, length,
228 iresult, jresult,
229 biresult,bjresult,result);
230 else
231 if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
232 return rsb_do_mergesort_double_complex_VBR(iarray, jarray,
233 biarray,bjarray,array, length,
234 iresult, jresult,
235 biresult,bjresult,result);
236 else
237 return RSB_ERR_UNSUPPORTED_TYPE;
238 }
239
rsb_do_mergesort_double_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,double * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double * restrict result)240 rsb_err_t rsb_do_mergesort_double_CSR(
241 rsb_coo_idx_t *restrict iarray,
242 rsb_coo_idx_t *restrict jarray,
243 double *array,
244 rsb_nnz_idx_t length,
245 rsb_coo_idx_t *restrict iresult,
246 rsb_coo_idx_t *restrict jresult,
247 double *restrict result)
248
249 {
250 /*!
251 * \ingroup gr_util
252 * This function will sort the non zero elements of a sparse blocked
253 * double matrix.
254 * It will read row and column indices arrays, the values array,
255 * and will sort them in separate output arrays.
256 *
257 * NOTE : This function could be optimized.
258 *
259 * \param iarray the input row indices array
260 * \param jarray the input column indices array
261 * \param array the input double array
262 * \param iresult the output row indices array
263 * \param jresult the output column indices array
264 * \param result the output values array
265 * Will sort thethree arrays (iarray, jarray, array) following the
266 * criteria :
267 *
268 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
269 * i.e.: C (row major) ordering
270 */
271
272 rsb_nnz_idx_t middle;
273 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
274
275 rsb_coo_idx_t * ileft ;
276 rsb_coo_idx_t * iright ;
277 rsb_coo_idx_t * jleft ;
278 rsb_coo_idx_t * jright ;
279 size_t tn=0;
280 double * left ;
281 double * right ;
282
283 #define LIMIT 1
284 if(length==LIMIT)
285 {
286 *iresult = *iarray;
287 *jresult = *jarray;
288 *(double*)result = *(double*)array;
289 }
290 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
291 #undef LIMIT
292 middle = length/2;
293
294 left = array;
295 right = array+middle;
296 ileft = iarray;
297 jleft = jarray;
298 iright = iarray+middle;
299 jright = jarray+middle;
300
301 /* 20121016 commented out omp usage because broke serial compilation */
302 {
303 rsb_do_mergesort_double_CSR
304 ( ileft, jleft,
305 left, middle,
306 iresult , jresult,
307 result );
308
309 if(tn==1)
310 rsb_do_mergesort_double_CSR
311 (iright, jright,
312 right, length-middle, iresult+middle ,jresult+middle,
313 ((double*)result)+middle );
314 }
315
316 RSB_MEMCPY(ileft ,iresult ,so*middle);
317 RSB_MEMCPY(jleft ,jresult ,so*middle);
318 RSB_MEMCPY( left, result ,sizeof(double)*middle);
319 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
320 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
321 RSB_MEMCPY( right, ((double*)result)+middle ,sizeof(double)*(length-middle));
322
323 rsb_do_merge_double_CSR (
324 ileft,iright,iresult,
325 jleft,jright,jresult,
326
327 left, right, result,
328 middle,length-middle
329 );
330 return RSB_ERR_NO_ERROR; /* ! */
331 }
332
333
334
rsb_do_merge_double_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const double * left,const double * restrict right,double * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)335 void rsb_do_merge_double_CSR(
336 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
337 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
338 const double* left, const double* restrict right, double* restrict result,
339 rsb_nnz_idx_t left_length,
340 rsb_nnz_idx_t right_length )
341
342
343 {
344 /*!
345 * \ingroup gr_util
346 * The merge function for our CSR matrix coefficients sorting.
347 *
348 * NOTE :This function is the mergesort bottleneck.
349 */
350 register int left_index=0, right_index=0, result_index=0;
351
352 /*
353 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
354 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
355 |<- length ----->|
356 ^- index
357 */
358
359
360 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
361 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
362 #define RESULT_ADVANCE result_index =( result_index+1);
363
364 #define RESULT_APPEND(IEL,JEL,EL) \
365 iresult[result_index]=(IEL); \
366 jresult[result_index]=(JEL); \
367 result[result_index]=( EL); \
368 RESULT_ADVANCE;
369
370 #define LRESULT_APPEND \
371 iresult[result_index]=ileft[left_index];\
372 jresult[result_index]=jleft[left_index];\
373 result[ result_index]= left[left_index];\
374 RESULT_ADVANCE; \
375 LEFT_ADVANCE;
376
377 #define RRESULT_APPEND \
378 iresult[result_index]=iright[right_index];\
379 jresult[result_index]=jright[right_index];\
380 result[result_index]= right[right_index];\
381 RESULT_ADVANCE; \
382 RIGHT_ADVANCE;
383
384 while( left_length > 0 && right_length > 0)
385 if(
386
387 ileft[left_index] < iright[right_index] ||
388 ( ileft[left_index] == iright[right_index] &&
389 jleft[left_index] <= jright[right_index] )
390 )
391 {
392 LRESULT_APPEND
393 }
394 else
395 {
396 RRESULT_APPEND
397 }
398
399 while( left_length > 0 )
400 {
401 LRESULT_APPEND
402 }
403 while( right_length > 0 )
404 {
405 RRESULT_APPEND
406 }
407 #undef LEFT_ADVANCE
408 #undef RIGHT_ADVANCE
409 #undef RESULT_ADVANCE
410 #undef RESULT_APPEND
411 #undef LRESULT_APPEND
412 #undef RRESULT_APPEND
413
414 }
415
rsb_do_mergesort_float_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,float * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float * restrict result)416 rsb_err_t rsb_do_mergesort_float_CSR(
417 rsb_coo_idx_t *restrict iarray,
418 rsb_coo_idx_t *restrict jarray,
419 float *array,
420 rsb_nnz_idx_t length,
421 rsb_coo_idx_t *restrict iresult,
422 rsb_coo_idx_t *restrict jresult,
423 float *restrict result)
424
425 {
426 /*!
427 * \ingroup gr_util
428 * This function will sort the non zero elements of a sparse blocked
429 * float matrix.
430 * It will read row and column indices arrays, the values array,
431 * and will sort them in separate output arrays.
432 *
433 * NOTE : This function could be optimized.
434 *
435 * \param iarray the input row indices array
436 * \param jarray the input column indices array
437 * \param array the input float array
438 * \param iresult the output row indices array
439 * \param jresult the output column indices array
440 * \param result the output values array
441 * Will sort thethree arrays (iarray, jarray, array) following the
442 * criteria :
443 *
444 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
445 * i.e.: C (row major) ordering
446 */
447
448 rsb_nnz_idx_t middle;
449 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
450
451 rsb_coo_idx_t * ileft ;
452 rsb_coo_idx_t * iright ;
453 rsb_coo_idx_t * jleft ;
454 rsb_coo_idx_t * jright ;
455 size_t tn=0;
456 float * left ;
457 float * right ;
458
459 #define LIMIT 1
460 if(length==LIMIT)
461 {
462 *iresult = *iarray;
463 *jresult = *jarray;
464 *(float*)result = *(float*)array;
465 }
466 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
467 #undef LIMIT
468 middle = length/2;
469
470 left = array;
471 right = array+middle;
472 ileft = iarray;
473 jleft = jarray;
474 iright = iarray+middle;
475 jright = jarray+middle;
476
477 /* 20121016 commented out omp usage because broke serial compilation */
478 {
479 rsb_do_mergesort_float_CSR
480 ( ileft, jleft,
481 left, middle,
482 iresult , jresult,
483 result );
484
485 if(tn==1)
486 rsb_do_mergesort_float_CSR
487 (iright, jright,
488 right, length-middle, iresult+middle ,jresult+middle,
489 ((float*)result)+middle );
490 }
491
492 RSB_MEMCPY(ileft ,iresult ,so*middle);
493 RSB_MEMCPY(jleft ,jresult ,so*middle);
494 RSB_MEMCPY( left, result ,sizeof(float)*middle);
495 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
496 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
497 RSB_MEMCPY( right, ((float*)result)+middle ,sizeof(float)*(length-middle));
498
499 rsb_do_merge_float_CSR (
500 ileft,iright,iresult,
501 jleft,jright,jresult,
502
503 left, right, result,
504 middle,length-middle
505 );
506 return RSB_ERR_NO_ERROR; /* ! */
507 }
508
509
510
rsb_do_merge_float_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const float * left,const float * restrict right,float * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)511 void rsb_do_merge_float_CSR(
512 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
513 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
514 const float* left, const float* restrict right, float* restrict result,
515 rsb_nnz_idx_t left_length,
516 rsb_nnz_idx_t right_length )
517
518
519 {
520 /*!
521 * \ingroup gr_util
522 * The merge function for our CSR matrix coefficients sorting.
523 *
524 * NOTE :This function is the mergesort bottleneck.
525 */
526 register int left_index=0, right_index=0, result_index=0;
527
528 /*
529 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
530 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
531 |<- length ----->|
532 ^- index
533 */
534
535
536 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
537 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
538 #define RESULT_ADVANCE result_index =( result_index+1);
539
540 #define RESULT_APPEND(IEL,JEL,EL) \
541 iresult[result_index]=(IEL); \
542 jresult[result_index]=(JEL); \
543 result[result_index]=( EL); \
544 RESULT_ADVANCE;
545
546 #define LRESULT_APPEND \
547 iresult[result_index]=ileft[left_index];\
548 jresult[result_index]=jleft[left_index];\
549 result[ result_index]= left[left_index];\
550 RESULT_ADVANCE; \
551 LEFT_ADVANCE;
552
553 #define RRESULT_APPEND \
554 iresult[result_index]=iright[right_index];\
555 jresult[result_index]=jright[right_index];\
556 result[result_index]= right[right_index];\
557 RESULT_ADVANCE; \
558 RIGHT_ADVANCE;
559
560 while( left_length > 0 && right_length > 0)
561 if(
562
563 ileft[left_index] < iright[right_index] ||
564 ( ileft[left_index] == iright[right_index] &&
565 jleft[left_index] <= jright[right_index] )
566 )
567 {
568 LRESULT_APPEND
569 }
570 else
571 {
572 RRESULT_APPEND
573 }
574
575 while( left_length > 0 )
576 {
577 LRESULT_APPEND
578 }
579 while( right_length > 0 )
580 {
581 RRESULT_APPEND
582 }
583 #undef LEFT_ADVANCE
584 #undef RIGHT_ADVANCE
585 #undef RESULT_ADVANCE
586 #undef RESULT_APPEND
587 #undef LRESULT_APPEND
588 #undef RRESULT_APPEND
589
590 }
591
rsb_do_mergesort_float_complex_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,float complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float complex * restrict result)592 rsb_err_t rsb_do_mergesort_float_complex_CSR(
593 rsb_coo_idx_t *restrict iarray,
594 rsb_coo_idx_t *restrict jarray,
595 float complex *array,
596 rsb_nnz_idx_t length,
597 rsb_coo_idx_t *restrict iresult,
598 rsb_coo_idx_t *restrict jresult,
599 float complex *restrict result)
600
601 {
602 /*!
603 * \ingroup gr_util
604 * This function will sort the non zero elements of a sparse blocked
605 * float complex matrix.
606 * It will read row and column indices arrays, the values array,
607 * and will sort them in separate output arrays.
608 *
609 * NOTE : This function could be optimized.
610 *
611 * \param iarray the input row indices array
612 * \param jarray the input column indices array
613 * \param array the input float complex array
614 * \param iresult the output row indices array
615 * \param jresult the output column indices array
616 * \param result the output values array
617 * Will sort thethree arrays (iarray, jarray, array) following the
618 * criteria :
619 *
620 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
621 * i.e.: C (row major) ordering
622 */
623
624 rsb_nnz_idx_t middle;
625 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
626
627 rsb_coo_idx_t * ileft ;
628 rsb_coo_idx_t * iright ;
629 rsb_coo_idx_t * jleft ;
630 rsb_coo_idx_t * jright ;
631 size_t tn=0;
632 float complex * left ;
633 float complex * right ;
634
635 #define LIMIT 1
636 if(length==LIMIT)
637 {
638 *iresult = *iarray;
639 *jresult = *jarray;
640 *(float complex*)result = *(float complex*)array;
641 }
642 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
643 #undef LIMIT
644 middle = length/2;
645
646 left = array;
647 right = array+middle;
648 ileft = iarray;
649 jleft = jarray;
650 iright = iarray+middle;
651 jright = jarray+middle;
652
653 /* 20121016 commented out omp usage because broke serial compilation */
654 {
655 rsb_do_mergesort_float_complex_CSR
656 ( ileft, jleft,
657 left, middle,
658 iresult , jresult,
659 result );
660
661 if(tn==1)
662 rsb_do_mergesort_float_complex_CSR
663 (iright, jright,
664 right, length-middle, iresult+middle ,jresult+middle,
665 ((float complex*)result)+middle );
666 }
667
668 RSB_MEMCPY(ileft ,iresult ,so*middle);
669 RSB_MEMCPY(jleft ,jresult ,so*middle);
670 RSB_MEMCPY( left, result ,sizeof(float complex)*middle);
671 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
672 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
673 RSB_MEMCPY( right, ((float complex*)result)+middle ,sizeof(float complex)*(length-middle));
674
675 rsb_do_merge_float_complex_CSR (
676 ileft,iright,iresult,
677 jleft,jright,jresult,
678
679 left, right, result,
680 middle,length-middle
681 );
682 return RSB_ERR_NO_ERROR; /* ! */
683 }
684
685
686
rsb_do_merge_float_complex_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const float complex * left,const float complex * restrict right,float complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)687 void rsb_do_merge_float_complex_CSR(
688 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
689 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
690 const float complex* left, const float complex* restrict right, float complex* restrict result,
691 rsb_nnz_idx_t left_length,
692 rsb_nnz_idx_t right_length )
693
694
695 {
696 /*!
697 * \ingroup gr_util
698 * The merge function for our CSR matrix coefficients sorting.
699 *
700 * NOTE :This function is the mergesort bottleneck.
701 */
702 register int left_index=0, right_index=0, result_index=0;
703
704 /*
705 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
706 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
707 |<- length ----->|
708 ^- index
709 */
710
711
712 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
713 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
714 #define RESULT_ADVANCE result_index =( result_index+1);
715
716 #define RESULT_APPEND(IEL,JEL,EL) \
717 iresult[result_index]=(IEL); \
718 jresult[result_index]=(JEL); \
719 result[result_index]=( EL); \
720 RESULT_ADVANCE;
721
722 #define LRESULT_APPEND \
723 iresult[result_index]=ileft[left_index];\
724 jresult[result_index]=jleft[left_index];\
725 result[ result_index]= left[left_index];\
726 RESULT_ADVANCE; \
727 LEFT_ADVANCE;
728
729 #define RRESULT_APPEND \
730 iresult[result_index]=iright[right_index];\
731 jresult[result_index]=jright[right_index];\
732 result[result_index]= right[right_index];\
733 RESULT_ADVANCE; \
734 RIGHT_ADVANCE;
735
736 while( left_length > 0 && right_length > 0)
737 if(
738
739 ileft[left_index] < iright[right_index] ||
740 ( ileft[left_index] == iright[right_index] &&
741 jleft[left_index] <= jright[right_index] )
742 )
743 {
744 LRESULT_APPEND
745 }
746 else
747 {
748 RRESULT_APPEND
749 }
750
751 while( left_length > 0 )
752 {
753 LRESULT_APPEND
754 }
755 while( right_length > 0 )
756 {
757 RRESULT_APPEND
758 }
759 #undef LEFT_ADVANCE
760 #undef RIGHT_ADVANCE
761 #undef RESULT_ADVANCE
762 #undef RESULT_APPEND
763 #undef LRESULT_APPEND
764 #undef RRESULT_APPEND
765
766 }
767
rsb_do_mergesort_double_complex_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,double complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double complex * restrict result)768 rsb_err_t rsb_do_mergesort_double_complex_CSR(
769 rsb_coo_idx_t *restrict iarray,
770 rsb_coo_idx_t *restrict jarray,
771 double complex *array,
772 rsb_nnz_idx_t length,
773 rsb_coo_idx_t *restrict iresult,
774 rsb_coo_idx_t *restrict jresult,
775 double complex *restrict result)
776
777 {
778 /*!
779 * \ingroup gr_util
780 * This function will sort the non zero elements of a sparse blocked
781 * double complex matrix.
782 * It will read row and column indices arrays, the values array,
783 * and will sort them in separate output arrays.
784 *
785 * NOTE : This function could be optimized.
786 *
787 * \param iarray the input row indices array
788 * \param jarray the input column indices array
789 * \param array the input double complex array
790 * \param iresult the output row indices array
791 * \param jresult the output column indices array
792 * \param result the output values array
793 * Will sort thethree arrays (iarray, jarray, array) following the
794 * criteria :
795 *
796 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
797 * i.e.: C (row major) ordering
798 */
799
800 rsb_nnz_idx_t middle;
801 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
802
803 rsb_coo_idx_t * ileft ;
804 rsb_coo_idx_t * iright ;
805 rsb_coo_idx_t * jleft ;
806 rsb_coo_idx_t * jright ;
807 size_t tn=0;
808 double complex * left ;
809 double complex * right ;
810
811 #define LIMIT 1
812 if(length==LIMIT)
813 {
814 *iresult = *iarray;
815 *jresult = *jarray;
816 *(double complex*)result = *(double complex*)array;
817 }
818 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
819 #undef LIMIT
820 middle = length/2;
821
822 left = array;
823 right = array+middle;
824 ileft = iarray;
825 jleft = jarray;
826 iright = iarray+middle;
827 jright = jarray+middle;
828
829 /* 20121016 commented out omp usage because broke serial compilation */
830 {
831 rsb_do_mergesort_double_complex_CSR
832 ( ileft, jleft,
833 left, middle,
834 iresult , jresult,
835 result );
836
837 if(tn==1)
838 rsb_do_mergesort_double_complex_CSR
839 (iright, jright,
840 right, length-middle, iresult+middle ,jresult+middle,
841 ((double complex*)result)+middle );
842 }
843
844 RSB_MEMCPY(ileft ,iresult ,so*middle);
845 RSB_MEMCPY(jleft ,jresult ,so*middle);
846 RSB_MEMCPY( left, result ,sizeof(double complex)*middle);
847 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
848 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
849 RSB_MEMCPY( right, ((double complex*)result)+middle ,sizeof(double complex)*(length-middle));
850
851 rsb_do_merge_double_complex_CSR (
852 ileft,iright,iresult,
853 jleft,jright,jresult,
854
855 left, right, result,
856 middle,length-middle
857 );
858 return RSB_ERR_NO_ERROR; /* ! */
859 }
860
861
862
rsb_do_merge_double_complex_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const double complex * left,const double complex * restrict right,double complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)863 void rsb_do_merge_double_complex_CSR(
864 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
865 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
866 const double complex* left, const double complex* restrict right, double complex* restrict result,
867 rsb_nnz_idx_t left_length,
868 rsb_nnz_idx_t right_length )
869
870
871 {
872 /*!
873 * \ingroup gr_util
874 * The merge function for our CSR matrix coefficients sorting.
875 *
876 * NOTE :This function is the mergesort bottleneck.
877 */
878 register int left_index=0, right_index=0, result_index=0;
879
880 /*
881 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
882 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
883 |<- length ----->|
884 ^- index
885 */
886
887
888 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
889 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
890 #define RESULT_ADVANCE result_index =( result_index+1);
891
892 #define RESULT_APPEND(IEL,JEL,EL) \
893 iresult[result_index]=(IEL); \
894 jresult[result_index]=(JEL); \
895 result[result_index]=( EL); \
896 RESULT_ADVANCE;
897
898 #define LRESULT_APPEND \
899 iresult[result_index]=ileft[left_index];\
900 jresult[result_index]=jleft[left_index];\
901 result[ result_index]= left[left_index];\
902 RESULT_ADVANCE; \
903 LEFT_ADVANCE;
904
905 #define RRESULT_APPEND \
906 iresult[result_index]=iright[right_index];\
907 jresult[result_index]=jright[right_index];\
908 result[result_index]= right[right_index];\
909 RESULT_ADVANCE; \
910 RIGHT_ADVANCE;
911
912 while( left_length > 0 && right_length > 0)
913 if(
914
915 ileft[left_index] < iright[right_index] ||
916 ( ileft[left_index] == iright[right_index] &&
917 jleft[left_index] <= jright[right_index] )
918 )
919 {
920 LRESULT_APPEND
921 }
922 else
923 {
924 RRESULT_APPEND
925 }
926
927 while( left_length > 0 )
928 {
929 LRESULT_APPEND
930 }
931 while( right_length > 0 )
932 {
933 RRESULT_APPEND
934 }
935 #undef LEFT_ADVANCE
936 #undef RIGHT_ADVANCE
937 #undef RESULT_ADVANCE
938 #undef RESULT_APPEND
939 #undef LRESULT_APPEND
940 #undef RRESULT_APPEND
941
942 }
943
rsb_do_mergesort_double_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,double * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double * restrict result)944 rsb_err_t rsb_do_mergesort_double_BCSR(
945 rsb_coo_idx_t *restrict iarray,
946 rsb_coo_idx_t *restrict jarray,
947 rsb_coo_idx_t mb, rsb_coo_idx_t kb,
948 double *array,
949 rsb_nnz_idx_t length,
950 rsb_coo_idx_t *restrict iresult,
951 rsb_coo_idx_t *restrict jresult,
952 double *restrict result)
953
954 {
955 /*!
956 * \ingroup gr_util
957 * This function will sort the non zero elements of a sparse blocked
958 * double matrix.
959 * It will read row and column indices arrays, the values array,
960 * and will sort them in separate output arrays.
961 *
962 * NOTE : This function could be optimized.
963 *
964 * \param iarray the input row indices array
965 * \param jarray the input column indices array
966 * \param array the input double array
967 * \param iresult the output row indices array
968 * \param jresult the output column indices array
969 * \param result the output values array
970 * Will sort thethree arrays (iarray, jarray, array) following the
971 * criteria :
972 *
973 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
974 * i.e.: C (row major) ordering
975 */
976
977 rsb_nnz_idx_t middle;
978 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
979
980 rsb_coo_idx_t * ileft ;
981 rsb_coo_idx_t * iright ;
982 rsb_coo_idx_t * jleft ;
983 rsb_coo_idx_t * jright ;
984 size_t tn=0;
985 double * left ;
986 double * right ;
987
988 #define LIMIT 1
989 if(length==LIMIT)
990 {
991 *iresult = *iarray;
992 *jresult = *jarray;
993 *(double*)result = *(double*)array;
994 }
995 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
996 #undef LIMIT
997 middle = length/2;
998
999 left = array;
1000 right = array+middle;
1001 ileft = iarray;
1002 jleft = jarray;
1003 iright = iarray+middle;
1004 jright = jarray+middle;
1005
1006 /* 20121016 commented out omp usage because broke serial compilation */
1007 {
1008 rsb_do_mergesort_double_BCSR
1009 ( ileft, jleft,
1010 mb, kb, left, middle,
1011 iresult , jresult,
1012 result );
1013
1014 if(tn==1)
1015 rsb_do_mergesort_double_BCSR
1016 (iright, jright,
1017 mb, kb, right, length-middle, iresult+middle ,jresult+middle,
1018 ((double*)result)+middle );
1019 }
1020
1021 RSB_MEMCPY(ileft ,iresult ,so*middle);
1022 RSB_MEMCPY(jleft ,jresult ,so*middle);
1023 RSB_MEMCPY( left, result ,sizeof(double)*middle);
1024 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1025 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1026 RSB_MEMCPY( right, ((double*)result)+middle ,sizeof(double)*(length-middle));
1027
1028 rsb_do_merge_double_BCSR (
1029 ileft,iright,iresult,
1030 jleft,jright,jresult,
1031 mb,kb,
1032 left, right, result,
1033 middle,length-middle
1034 );
1035 return RSB_ERR_NO_ERROR; /* ! */
1036 }
1037
1038
1039
rsb_do_merge_double_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const double * left,const double * restrict right,double * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1040 void rsb_do_merge_double_BCSR(
1041 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
1042 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
1043 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb, const double* left, const double* restrict right, double* restrict result,
1044 rsb_nnz_idx_t left_length,
1045 rsb_nnz_idx_t right_length )
1046
1047
1048 {
1049 /*!
1050 * \ingroup gr_util
1051 * The merge function for our BCSR matrix coefficients sorting.
1052 *
1053 * NOTE :This function is the mergesort bottleneck.
1054 */
1055 register int left_index=0, right_index=0, result_index=0;
1056
1057 /*
1058 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1059 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1060 |<- length ----->|
1061 ^- index
1062 */
1063
1064
1065 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
1066 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
1067 #define RESULT_ADVANCE result_index =( result_index+1);
1068
1069 #define RESULT_APPEND(IEL,JEL,EL) \
1070 iresult[result_index]=(IEL); \
1071 jresult[result_index]=(JEL); \
1072 result[result_index]=( EL); \
1073 RESULT_ADVANCE;
1074
1075 #define LRESULT_APPEND \
1076 iresult[result_index]=ileft[left_index];\
1077 jresult[result_index]=jleft[left_index];\
1078 result[ result_index]= left[left_index];\
1079 RESULT_ADVANCE; \
1080 LEFT_ADVANCE;
1081
1082 #define RRESULT_APPEND \
1083 iresult[result_index]=iright[right_index];\
1084 jresult[result_index]=jright[right_index];\
1085 result[result_index]= right[right_index];\
1086 RESULT_ADVANCE; \
1087 RIGHT_ADVANCE;
1088
1089 while( left_length > 0 && right_length > 0)
1090 if(
1091
1092 ileft[left_index]/mb < iright[right_index]/mb ||
1093 ( ileft[left_index]/mb == iright[right_index]/mb &&
1094 jleft[left_index]/kb <= jright[right_index]/kb )
1095 )
1096 {
1097 LRESULT_APPEND
1098 }
1099 else
1100 {
1101 RRESULT_APPEND
1102 }
1103
1104 while( left_length > 0 )
1105 {
1106 LRESULT_APPEND
1107 }
1108 while( right_length > 0 )
1109 {
1110 RRESULT_APPEND
1111 }
1112 #undef LEFT_ADVANCE
1113 #undef RIGHT_ADVANCE
1114 #undef RESULT_ADVANCE
1115 #undef RESULT_APPEND
1116 #undef LRESULT_APPEND
1117 #undef RRESULT_APPEND
1118
1119 }
1120
rsb_do_mergesort_float_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,float * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float * restrict result)1121 rsb_err_t rsb_do_mergesort_float_BCSR(
1122 rsb_coo_idx_t *restrict iarray,
1123 rsb_coo_idx_t *restrict jarray,
1124 rsb_coo_idx_t mb, rsb_coo_idx_t kb,
1125 float *array,
1126 rsb_nnz_idx_t length,
1127 rsb_coo_idx_t *restrict iresult,
1128 rsb_coo_idx_t *restrict jresult,
1129 float *restrict result)
1130
1131 {
1132 /*!
1133 * \ingroup gr_util
1134 * This function will sort the non zero elements of a sparse blocked
1135 * float matrix.
1136 * It will read row and column indices arrays, the values array,
1137 * and will sort them in separate output arrays.
1138 *
1139 * NOTE : This function could be optimized.
1140 *
1141 * \param iarray the input row indices array
1142 * \param jarray the input column indices array
1143 * \param array the input float array
1144 * \param iresult the output row indices array
1145 * \param jresult the output column indices array
1146 * \param result the output values array
1147 * Will sort thethree arrays (iarray, jarray, array) following the
1148 * criteria :
1149 *
1150 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1151 * i.e.: C (row major) ordering
1152 */
1153
1154 rsb_nnz_idx_t middle;
1155 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1156
1157 rsb_coo_idx_t * ileft ;
1158 rsb_coo_idx_t * iright ;
1159 rsb_coo_idx_t * jleft ;
1160 rsb_coo_idx_t * jright ;
1161 size_t tn=0;
1162 float * left ;
1163 float * right ;
1164
1165 #define LIMIT 1
1166 if(length==LIMIT)
1167 {
1168 *iresult = *iarray;
1169 *jresult = *jarray;
1170 *(float*)result = *(float*)array;
1171 }
1172 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1173 #undef LIMIT
1174 middle = length/2;
1175
1176 left = array;
1177 right = array+middle;
1178 ileft = iarray;
1179 jleft = jarray;
1180 iright = iarray+middle;
1181 jright = jarray+middle;
1182
1183 /* 20121016 commented out omp usage because broke serial compilation */
1184 {
1185 rsb_do_mergesort_float_BCSR
1186 ( ileft, jleft,
1187 mb, kb, left, middle,
1188 iresult , jresult,
1189 result );
1190
1191 if(tn==1)
1192 rsb_do_mergesort_float_BCSR
1193 (iright, jright,
1194 mb, kb, right, length-middle, iresult+middle ,jresult+middle,
1195 ((float*)result)+middle );
1196 }
1197
1198 RSB_MEMCPY(ileft ,iresult ,so*middle);
1199 RSB_MEMCPY(jleft ,jresult ,so*middle);
1200 RSB_MEMCPY( left, result ,sizeof(float)*middle);
1201 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1202 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1203 RSB_MEMCPY( right, ((float*)result)+middle ,sizeof(float)*(length-middle));
1204
1205 rsb_do_merge_float_BCSR (
1206 ileft,iright,iresult,
1207 jleft,jright,jresult,
1208 mb,kb,
1209 left, right, result,
1210 middle,length-middle
1211 );
1212 return RSB_ERR_NO_ERROR; /* ! */
1213 }
1214
1215
1216
rsb_do_merge_float_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const float * left,const float * restrict right,float * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1217 void rsb_do_merge_float_BCSR(
1218 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
1219 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
1220 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb, const float* left, const float* restrict right, float* restrict result,
1221 rsb_nnz_idx_t left_length,
1222 rsb_nnz_idx_t right_length )
1223
1224
1225 {
1226 /*!
1227 * \ingroup gr_util
1228 * The merge function for our BCSR matrix coefficients sorting.
1229 *
1230 * NOTE :This function is the mergesort bottleneck.
1231 */
1232 register int left_index=0, right_index=0, result_index=0;
1233
1234 /*
1235 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1236 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1237 |<- length ----->|
1238 ^- index
1239 */
1240
1241
1242 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
1243 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
1244 #define RESULT_ADVANCE result_index =( result_index+1);
1245
1246 #define RESULT_APPEND(IEL,JEL,EL) \
1247 iresult[result_index]=(IEL); \
1248 jresult[result_index]=(JEL); \
1249 result[result_index]=( EL); \
1250 RESULT_ADVANCE;
1251
1252 #define LRESULT_APPEND \
1253 iresult[result_index]=ileft[left_index];\
1254 jresult[result_index]=jleft[left_index];\
1255 result[ result_index]= left[left_index];\
1256 RESULT_ADVANCE; \
1257 LEFT_ADVANCE;
1258
1259 #define RRESULT_APPEND \
1260 iresult[result_index]=iright[right_index];\
1261 jresult[result_index]=jright[right_index];\
1262 result[result_index]= right[right_index];\
1263 RESULT_ADVANCE; \
1264 RIGHT_ADVANCE;
1265
1266 while( left_length > 0 && right_length > 0)
1267 if(
1268
1269 ileft[left_index]/mb < iright[right_index]/mb ||
1270 ( ileft[left_index]/mb == iright[right_index]/mb &&
1271 jleft[left_index]/kb <= jright[right_index]/kb )
1272 )
1273 {
1274 LRESULT_APPEND
1275 }
1276 else
1277 {
1278 RRESULT_APPEND
1279 }
1280
1281 while( left_length > 0 )
1282 {
1283 LRESULT_APPEND
1284 }
1285 while( right_length > 0 )
1286 {
1287 RRESULT_APPEND
1288 }
1289 #undef LEFT_ADVANCE
1290 #undef RIGHT_ADVANCE
1291 #undef RESULT_ADVANCE
1292 #undef RESULT_APPEND
1293 #undef LRESULT_APPEND
1294 #undef RRESULT_APPEND
1295
1296 }
1297
rsb_do_mergesort_float_complex_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,float complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float complex * restrict result)1298 rsb_err_t rsb_do_mergesort_float_complex_BCSR(
1299 rsb_coo_idx_t *restrict iarray,
1300 rsb_coo_idx_t *restrict jarray,
1301 rsb_coo_idx_t mb, rsb_coo_idx_t kb,
1302 float complex *array,
1303 rsb_nnz_idx_t length,
1304 rsb_coo_idx_t *restrict iresult,
1305 rsb_coo_idx_t *restrict jresult,
1306 float complex *restrict result)
1307
1308 {
1309 /*!
1310 * \ingroup gr_util
1311 * This function will sort the non zero elements of a sparse blocked
1312 * float complex matrix.
1313 * It will read row and column indices arrays, the values array,
1314 * and will sort them in separate output arrays.
1315 *
1316 * NOTE : This function could be optimized.
1317 *
1318 * \param iarray the input row indices array
1319 * \param jarray the input column indices array
1320 * \param array the input float complex array
1321 * \param iresult the output row indices array
1322 * \param jresult the output column indices array
1323 * \param result the output values array
1324 * Will sort thethree arrays (iarray, jarray, array) following the
1325 * criteria :
1326 *
1327 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1328 * i.e.: C (row major) ordering
1329 */
1330
1331 rsb_nnz_idx_t middle;
1332 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1333
1334 rsb_coo_idx_t * ileft ;
1335 rsb_coo_idx_t * iright ;
1336 rsb_coo_idx_t * jleft ;
1337 rsb_coo_idx_t * jright ;
1338 size_t tn=0;
1339 float complex * left ;
1340 float complex * right ;
1341
1342 #define LIMIT 1
1343 if(length==LIMIT)
1344 {
1345 *iresult = *iarray;
1346 *jresult = *jarray;
1347 *(float complex*)result = *(float complex*)array;
1348 }
1349 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1350 #undef LIMIT
1351 middle = length/2;
1352
1353 left = array;
1354 right = array+middle;
1355 ileft = iarray;
1356 jleft = jarray;
1357 iright = iarray+middle;
1358 jright = jarray+middle;
1359
1360 /* 20121016 commented out omp usage because broke serial compilation */
1361 {
1362 rsb_do_mergesort_float_complex_BCSR
1363 ( ileft, jleft,
1364 mb, kb, left, middle,
1365 iresult , jresult,
1366 result );
1367
1368 if(tn==1)
1369 rsb_do_mergesort_float_complex_BCSR
1370 (iright, jright,
1371 mb, kb, right, length-middle, iresult+middle ,jresult+middle,
1372 ((float complex*)result)+middle );
1373 }
1374
1375 RSB_MEMCPY(ileft ,iresult ,so*middle);
1376 RSB_MEMCPY(jleft ,jresult ,so*middle);
1377 RSB_MEMCPY( left, result ,sizeof(float complex)*middle);
1378 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1379 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1380 RSB_MEMCPY( right, ((float complex*)result)+middle ,sizeof(float complex)*(length-middle));
1381
1382 rsb_do_merge_float_complex_BCSR (
1383 ileft,iright,iresult,
1384 jleft,jright,jresult,
1385 mb,kb,
1386 left, right, result,
1387 middle,length-middle
1388 );
1389 return RSB_ERR_NO_ERROR; /* ! */
1390 }
1391
1392
1393
rsb_do_merge_float_complex_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const float complex * left,const float complex * restrict right,float complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1394 void rsb_do_merge_float_complex_BCSR(
1395 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
1396 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
1397 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb, const float complex* left, const float complex* restrict right, float complex* restrict result,
1398 rsb_nnz_idx_t left_length,
1399 rsb_nnz_idx_t right_length )
1400
1401
1402 {
1403 /*!
1404 * \ingroup gr_util
1405 * The merge function for our BCSR matrix coefficients sorting.
1406 *
1407 * NOTE :This function is the mergesort bottleneck.
1408 */
1409 register int left_index=0, right_index=0, result_index=0;
1410
1411 /*
1412 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1413 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1414 |<- length ----->|
1415 ^- index
1416 */
1417
1418
1419 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
1420 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
1421 #define RESULT_ADVANCE result_index =( result_index+1);
1422
1423 #define RESULT_APPEND(IEL,JEL,EL) \
1424 iresult[result_index]=(IEL); \
1425 jresult[result_index]=(JEL); \
1426 result[result_index]=( EL); \
1427 RESULT_ADVANCE;
1428
1429 #define LRESULT_APPEND \
1430 iresult[result_index]=ileft[left_index];\
1431 jresult[result_index]=jleft[left_index];\
1432 result[ result_index]= left[left_index];\
1433 RESULT_ADVANCE; \
1434 LEFT_ADVANCE;
1435
1436 #define RRESULT_APPEND \
1437 iresult[result_index]=iright[right_index];\
1438 jresult[result_index]=jright[right_index];\
1439 result[result_index]= right[right_index];\
1440 RESULT_ADVANCE; \
1441 RIGHT_ADVANCE;
1442
1443 while( left_length > 0 && right_length > 0)
1444 if(
1445
1446 ileft[left_index]/mb < iright[right_index]/mb ||
1447 ( ileft[left_index]/mb == iright[right_index]/mb &&
1448 jleft[left_index]/kb <= jright[right_index]/kb )
1449 )
1450 {
1451 LRESULT_APPEND
1452 }
1453 else
1454 {
1455 RRESULT_APPEND
1456 }
1457
1458 while( left_length > 0 )
1459 {
1460 LRESULT_APPEND
1461 }
1462 while( right_length > 0 )
1463 {
1464 RRESULT_APPEND
1465 }
1466 #undef LEFT_ADVANCE
1467 #undef RIGHT_ADVANCE
1468 #undef RESULT_ADVANCE
1469 #undef RESULT_APPEND
1470 #undef LRESULT_APPEND
1471 #undef RRESULT_APPEND
1472
1473 }
1474
rsb_do_mergesort_double_complex_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,double complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double complex * restrict result)1475 rsb_err_t rsb_do_mergesort_double_complex_BCSR(
1476 rsb_coo_idx_t *restrict iarray,
1477 rsb_coo_idx_t *restrict jarray,
1478 rsb_coo_idx_t mb, rsb_coo_idx_t kb,
1479 double complex *array,
1480 rsb_nnz_idx_t length,
1481 rsb_coo_idx_t *restrict iresult,
1482 rsb_coo_idx_t *restrict jresult,
1483 double complex *restrict result)
1484
1485 {
1486 /*!
1487 * \ingroup gr_util
1488 * This function will sort the non zero elements of a sparse blocked
1489 * double complex matrix.
1490 * It will read row and column indices arrays, the values array,
1491 * and will sort them in separate output arrays.
1492 *
1493 * NOTE : This function could be optimized.
1494 *
1495 * \param iarray the input row indices array
1496 * \param jarray the input column indices array
1497 * \param array the input double complex array
1498 * \param iresult the output row indices array
1499 * \param jresult the output column indices array
1500 * \param result the output values array
1501 * Will sort thethree arrays (iarray, jarray, array) following the
1502 * criteria :
1503 *
1504 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1505 * i.e.: C (row major) ordering
1506 */
1507
1508 rsb_nnz_idx_t middle;
1509 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1510
1511 rsb_coo_idx_t * ileft ;
1512 rsb_coo_idx_t * iright ;
1513 rsb_coo_idx_t * jleft ;
1514 rsb_coo_idx_t * jright ;
1515 size_t tn=0;
1516 double complex * left ;
1517 double complex * right ;
1518
1519 #define LIMIT 1
1520 if(length==LIMIT)
1521 {
1522 *iresult = *iarray;
1523 *jresult = *jarray;
1524 *(double complex*)result = *(double complex*)array;
1525 }
1526 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1527 #undef LIMIT
1528 middle = length/2;
1529
1530 left = array;
1531 right = array+middle;
1532 ileft = iarray;
1533 jleft = jarray;
1534 iright = iarray+middle;
1535 jright = jarray+middle;
1536
1537 /* 20121016 commented out omp usage because broke serial compilation */
1538 {
1539 rsb_do_mergesort_double_complex_BCSR
1540 ( ileft, jleft,
1541 mb, kb, left, middle,
1542 iresult , jresult,
1543 result );
1544
1545 if(tn==1)
1546 rsb_do_mergesort_double_complex_BCSR
1547 (iright, jright,
1548 mb, kb, right, length-middle, iresult+middle ,jresult+middle,
1549 ((double complex*)result)+middle );
1550 }
1551
1552 RSB_MEMCPY(ileft ,iresult ,so*middle);
1553 RSB_MEMCPY(jleft ,jresult ,so*middle);
1554 RSB_MEMCPY( left, result ,sizeof(double complex)*middle);
1555 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1556 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1557 RSB_MEMCPY( right, ((double complex*)result)+middle ,sizeof(double complex)*(length-middle));
1558
1559 rsb_do_merge_double_complex_BCSR (
1560 ileft,iright,iresult,
1561 jleft,jright,jresult,
1562 mb,kb,
1563 left, right, result,
1564 middle,length-middle
1565 );
1566 return RSB_ERR_NO_ERROR; /* ! */
1567 }
1568
1569
1570
rsb_do_merge_double_complex_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const double complex * left,const double complex * restrict right,double complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1571 void rsb_do_merge_double_complex_BCSR(
1572 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
1573 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
1574 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb, const double complex* left, const double complex* restrict right, double complex* restrict result,
1575 rsb_nnz_idx_t left_length,
1576 rsb_nnz_idx_t right_length )
1577
1578
1579 {
1580 /*!
1581 * \ingroup gr_util
1582 * The merge function for our BCSR matrix coefficients sorting.
1583 *
1584 * NOTE :This function is the mergesort bottleneck.
1585 */
1586 register int left_index=0, right_index=0, result_index=0;
1587
1588 /*
1589 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1590 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1591 |<- length ----->|
1592 ^- index
1593 */
1594
1595
1596 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
1597 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
1598 #define RESULT_ADVANCE result_index =( result_index+1);
1599
1600 #define RESULT_APPEND(IEL,JEL,EL) \
1601 iresult[result_index]=(IEL); \
1602 jresult[result_index]=(JEL); \
1603 result[result_index]=( EL); \
1604 RESULT_ADVANCE;
1605
1606 #define LRESULT_APPEND \
1607 iresult[result_index]=ileft[left_index];\
1608 jresult[result_index]=jleft[left_index];\
1609 result[ result_index]= left[left_index];\
1610 RESULT_ADVANCE; \
1611 LEFT_ADVANCE;
1612
1613 #define RRESULT_APPEND \
1614 iresult[result_index]=iright[right_index];\
1615 jresult[result_index]=jright[right_index];\
1616 result[result_index]= right[right_index];\
1617 RESULT_ADVANCE; \
1618 RIGHT_ADVANCE;
1619
1620 while( left_length > 0 && right_length > 0)
1621 if(
1622
1623 ileft[left_index]/mb < iright[right_index]/mb ||
1624 ( ileft[left_index]/mb == iright[right_index]/mb &&
1625 jleft[left_index]/kb <= jright[right_index]/kb )
1626 )
1627 {
1628 LRESULT_APPEND
1629 }
1630 else
1631 {
1632 RRESULT_APPEND
1633 }
1634
1635 while( left_length > 0 )
1636 {
1637 LRESULT_APPEND
1638 }
1639 while( right_length > 0 )
1640 {
1641 RRESULT_APPEND
1642 }
1643 #undef LEFT_ADVANCE
1644 #undef RIGHT_ADVANCE
1645 #undef RESULT_ADVANCE
1646 #undef RESULT_APPEND
1647 #undef LRESULT_APPEND
1648 #undef RRESULT_APPEND
1649
1650 }
1651
rsb_do_mergesort_double_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,double * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,double * restrict result)1652 rsb_err_t rsb_do_mergesort_double_VBR(
1653 rsb_coo_idx_t *restrict iarray,
1654 rsb_coo_idx_t *restrict jarray,
1655 rsb_coo_idx_t *restrict biarray,
1656 rsb_coo_idx_t *restrict bjarray,
1657 double *array,
1658 rsb_nnz_idx_t length,
1659 rsb_coo_idx_t *restrict iresult,
1660 rsb_coo_idx_t *restrict jresult,
1661 rsb_coo_idx_t *restrict biresult,
1662 rsb_coo_idx_t *restrict bjresult,
1663 double *restrict result)
1664
1665 {
1666 /*!
1667 * \ingroup gr_util
1668 * This function will sort the non zero elements of a sparse blocked
1669 * double matrix.
1670 * It will read row and column indices arrays, the values array,
1671 * and will sort them in separate output arrays.
1672 *
1673 * NOTE : This function could be optimized.
1674 *
1675 * \param iarray the input row indices array
1676 * \param jarray the input column indices array
1677 * \param array the input double array
1678 * \param iresult the output row indices array
1679 * \param jresult the output column indices array
1680 * \param result the output values array
1681 * \param biarray the input block row indices array
1682 * \param bjarray the input block column indices array
1683 * \param biresult the output block row indices array
1684 * \param bjresult the output block column indices array
1685 * Will sort thethree arrays (iarray, jarray, array) following the
1686 * criteria :
1687 *
1688 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1689 * i.e.: C (row major) ordering
1690 */
1691
1692 rsb_nnz_idx_t middle;
1693 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1694
1695 rsb_coo_idx_t * ileft ;
1696 rsb_coo_idx_t * iright ;
1697
1698 rsb_coo_idx_t * bileft ;
1699 rsb_coo_idx_t * biright ;
1700 rsb_coo_idx_t * jleft ;
1701 rsb_coo_idx_t * jright ;
1702
1703 rsb_coo_idx_t * bjleft ;
1704 rsb_coo_idx_t * bjright ;
1705 size_t tn=0;
1706 double * left ;
1707 double * right ;
1708
1709 #define LIMIT 1
1710 if(length==LIMIT)
1711 {
1712 *iresult = *iarray;
1713 *jresult = *jarray;
1714
1715 *biresult = *biarray;
1716 *bjresult = *bjarray;
1717 *(double*)result = *(double*)array;
1718 }
1719 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1720 #undef LIMIT
1721 middle = length/2;
1722
1723
1724 bileft = biarray;
1725 bjleft = bjarray;
1726 biright = biarray+middle;
1727 bjright = bjarray+middle;
1728 left = array;
1729 right = array+middle;
1730 ileft = iarray;
1731 jleft = jarray;
1732 iright = iarray+middle;
1733 jright = jarray+middle;
1734
1735 /* 20121016 commented out omp usage because broke serial compilation */
1736 {
1737 rsb_do_mergesort_double_VBR
1738 ( ileft, jleft,
1739 bileft, bjleft,
1740 left, middle,
1741 iresult , jresult,
1742 biresult, bjresult,
1743 result );
1744
1745 if(tn==1)
1746 rsb_do_mergesort_double_VBR
1747 (iright, jright,
1748 biright, bjright,
1749 right, length-middle, iresult+middle ,jresult+middle,
1750 biresult+middle, bjresult+middle,
1751 ((double*)result)+middle );
1752 }
1753
1754 RSB_MEMCPY(ileft ,iresult ,so*middle);
1755 RSB_MEMCPY(jleft ,jresult ,so*middle);
1756
1757 RSB_MEMCPY(bileft ,biresult ,so*middle);
1758 RSB_MEMCPY(bjleft ,bjresult ,so*middle);
1759 RSB_MEMCPY( left, result ,sizeof(double)*middle);
1760 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1761 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1762
1763 RSB_MEMCPY(biright ,biresult+middle ,so*(length-middle));
1764 RSB_MEMCPY(bjright ,bjresult+middle ,so*(length-middle));
1765 RSB_MEMCPY( right, ((double*)result)+middle ,sizeof(double)*(length-middle));
1766
1767 rsb_do_merge_double_VBR (
1768 ileft,iright,iresult,
1769 jleft,jright,jresult,
1770
1771 bileft, biright, biresult,
1772 bjleft, bjright, bjresult,
1773 left, right, result,
1774 middle,length-middle
1775 );
1776 return RSB_ERR_NO_ERROR; /* ! */
1777 }
1778
1779
1780
rsb_do_merge_double_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const double * left,const double * restrict right,double * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1781 void rsb_do_merge_double_VBR(
1782 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
1783 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
1784 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult, const double* left, const double* restrict right, double* restrict result,
1785 rsb_nnz_idx_t left_length,
1786 rsb_nnz_idx_t right_length )
1787
1788
1789 {
1790 /*!
1791 * \ingroup gr_util
1792 * The merge function for our VBR matrix coefficients sorting.
1793 *
1794 * NOTE :This function is the mergesort bottleneck.
1795 */
1796 register int left_index=0, right_index=0, result_index=0;
1797
1798 /*
1799 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1800 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1801 |<- length ----->|
1802 ^- index
1803 */
1804
1805
1806 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
1807 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
1808 #define RESULT_ADVANCE result_index =( result_index+1);
1809
1810 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL) \
1811 iresult[result_index]=(IEL); \
1812 jresult[result_index]=(JEL); \
1813 result[result_index]=( EL); \
1814 biresult[result_index]=(BIEL); \
1815 bjresult[result_index]=(BJEL); \
1816 RESULT_ADVANCE;
1817
1818 #define LRESULT_APPEND \
1819 iresult[result_index]=ileft[left_index];\
1820 jresult[result_index]=jleft[left_index];\
1821 biresult[result_index]=bileft[left_index]; \
1822 bjresult[result_index]=bjleft[left_index]; \
1823 result[ result_index]= left[left_index];\
1824 RESULT_ADVANCE; \
1825 LEFT_ADVANCE;
1826
1827 #define RRESULT_APPEND \
1828 iresult[result_index]=iright[right_index];\
1829 jresult[result_index]=jright[right_index];\
1830 biresult[result_index]=biright[right_index]; \
1831 bjresult[result_index]=bjright[right_index]; \
1832 result[result_index]= right[right_index];\
1833 RESULT_ADVANCE; \
1834 RIGHT_ADVANCE;
1835
1836 while( left_length > 0 && right_length > 0)
1837 if(
1838
1839 bileft[left_index] < biright[right_index] ||
1840 ( bileft[left_index] == biright[right_index] &&
1841 bjleft[left_index] <= bjright[right_index] )
1842 )
1843 {
1844 LRESULT_APPEND
1845 }
1846 else
1847 {
1848 RRESULT_APPEND
1849 }
1850
1851 while( left_length > 0 )
1852 {
1853 LRESULT_APPEND
1854 }
1855 while( right_length > 0 )
1856 {
1857 RRESULT_APPEND
1858 }
1859 #undef LEFT_ADVANCE
1860 #undef RIGHT_ADVANCE
1861 #undef RESULT_ADVANCE
1862 #undef RESULT_APPEND
1863 #undef LRESULT_APPEND
1864 #undef RRESULT_APPEND
1865
1866 }
1867
rsb_do_mergesort_float_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,float * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,float * restrict result)1868 rsb_err_t rsb_do_mergesort_float_VBR(
1869 rsb_coo_idx_t *restrict iarray,
1870 rsb_coo_idx_t *restrict jarray,
1871 rsb_coo_idx_t *restrict biarray,
1872 rsb_coo_idx_t *restrict bjarray,
1873 float *array,
1874 rsb_nnz_idx_t length,
1875 rsb_coo_idx_t *restrict iresult,
1876 rsb_coo_idx_t *restrict jresult,
1877 rsb_coo_idx_t *restrict biresult,
1878 rsb_coo_idx_t *restrict bjresult,
1879 float *restrict result)
1880
1881 {
1882 /*!
1883 * \ingroup gr_util
1884 * This function will sort the non zero elements of a sparse blocked
1885 * float matrix.
1886 * It will read row and column indices arrays, the values array,
1887 * and will sort them in separate output arrays.
1888 *
1889 * NOTE : This function could be optimized.
1890 *
1891 * \param iarray the input row indices array
1892 * \param jarray the input column indices array
1893 * \param array the input float array
1894 * \param iresult the output row indices array
1895 * \param jresult the output column indices array
1896 * \param result the output values array
1897 * \param biarray the input block row indices array
1898 * \param bjarray the input block column indices array
1899 * \param biresult the output block row indices array
1900 * \param bjresult the output block column indices array
1901 * Will sort thethree arrays (iarray, jarray, array) following the
1902 * criteria :
1903 *
1904 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1905 * i.e.: C (row major) ordering
1906 */
1907
1908 rsb_nnz_idx_t middle;
1909 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1910
1911 rsb_coo_idx_t * ileft ;
1912 rsb_coo_idx_t * iright ;
1913
1914 rsb_coo_idx_t * bileft ;
1915 rsb_coo_idx_t * biright ;
1916 rsb_coo_idx_t * jleft ;
1917 rsb_coo_idx_t * jright ;
1918
1919 rsb_coo_idx_t * bjleft ;
1920 rsb_coo_idx_t * bjright ;
1921 size_t tn=0;
1922 float * left ;
1923 float * right ;
1924
1925 #define LIMIT 1
1926 if(length==LIMIT)
1927 {
1928 *iresult = *iarray;
1929 *jresult = *jarray;
1930
1931 *biresult = *biarray;
1932 *bjresult = *bjarray;
1933 *(float*)result = *(float*)array;
1934 }
1935 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1936 #undef LIMIT
1937 middle = length/2;
1938
1939
1940 bileft = biarray;
1941 bjleft = bjarray;
1942 biright = biarray+middle;
1943 bjright = bjarray+middle;
1944 left = array;
1945 right = array+middle;
1946 ileft = iarray;
1947 jleft = jarray;
1948 iright = iarray+middle;
1949 jright = jarray+middle;
1950
1951 /* 20121016 commented out omp usage because broke serial compilation */
1952 {
1953 rsb_do_mergesort_float_VBR
1954 ( ileft, jleft,
1955 bileft, bjleft,
1956 left, middle,
1957 iresult , jresult,
1958 biresult, bjresult,
1959 result );
1960
1961 if(tn==1)
1962 rsb_do_mergesort_float_VBR
1963 (iright, jright,
1964 biright, bjright,
1965 right, length-middle, iresult+middle ,jresult+middle,
1966 biresult+middle, bjresult+middle,
1967 ((float*)result)+middle );
1968 }
1969
1970 RSB_MEMCPY(ileft ,iresult ,so*middle);
1971 RSB_MEMCPY(jleft ,jresult ,so*middle);
1972
1973 RSB_MEMCPY(bileft ,biresult ,so*middle);
1974 RSB_MEMCPY(bjleft ,bjresult ,so*middle);
1975 RSB_MEMCPY( left, result ,sizeof(float)*middle);
1976 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1977 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1978
1979 RSB_MEMCPY(biright ,biresult+middle ,so*(length-middle));
1980 RSB_MEMCPY(bjright ,bjresult+middle ,so*(length-middle));
1981 RSB_MEMCPY( right, ((float*)result)+middle ,sizeof(float)*(length-middle));
1982
1983 rsb_do_merge_float_VBR (
1984 ileft,iright,iresult,
1985 jleft,jright,jresult,
1986
1987 bileft, biright, biresult,
1988 bjleft, bjright, bjresult,
1989 left, right, result,
1990 middle,length-middle
1991 );
1992 return RSB_ERR_NO_ERROR; /* ! */
1993 }
1994
1995
1996
rsb_do_merge_float_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const float * left,const float * restrict right,float * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1997 void rsb_do_merge_float_VBR(
1998 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
1999 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
2000 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult, const float* left, const float* restrict right, float* restrict result,
2001 rsb_nnz_idx_t left_length,
2002 rsb_nnz_idx_t right_length )
2003
2004
2005 {
2006 /*!
2007 * \ingroup gr_util
2008 * The merge function for our VBR matrix coefficients sorting.
2009 *
2010 * NOTE :This function is the mergesort bottleneck.
2011 */
2012 register int left_index=0, right_index=0, result_index=0;
2013
2014 /*
2015 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2016 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2017 |<- length ----->|
2018 ^- index
2019 */
2020
2021
2022 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
2023 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
2024 #define RESULT_ADVANCE result_index =( result_index+1);
2025
2026 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL) \
2027 iresult[result_index]=(IEL); \
2028 jresult[result_index]=(JEL); \
2029 result[result_index]=( EL); \
2030 biresult[result_index]=(BIEL); \
2031 bjresult[result_index]=(BJEL); \
2032 RESULT_ADVANCE;
2033
2034 #define LRESULT_APPEND \
2035 iresult[result_index]=ileft[left_index];\
2036 jresult[result_index]=jleft[left_index];\
2037 biresult[result_index]=bileft[left_index]; \
2038 bjresult[result_index]=bjleft[left_index]; \
2039 result[ result_index]= left[left_index];\
2040 RESULT_ADVANCE; \
2041 LEFT_ADVANCE;
2042
2043 #define RRESULT_APPEND \
2044 iresult[result_index]=iright[right_index];\
2045 jresult[result_index]=jright[right_index];\
2046 biresult[result_index]=biright[right_index]; \
2047 bjresult[result_index]=bjright[right_index]; \
2048 result[result_index]= right[right_index];\
2049 RESULT_ADVANCE; \
2050 RIGHT_ADVANCE;
2051
2052 while( left_length > 0 && right_length > 0)
2053 if(
2054
2055 bileft[left_index] < biright[right_index] ||
2056 ( bileft[left_index] == biright[right_index] &&
2057 bjleft[left_index] <= bjright[right_index] )
2058 )
2059 {
2060 LRESULT_APPEND
2061 }
2062 else
2063 {
2064 RRESULT_APPEND
2065 }
2066
2067 while( left_length > 0 )
2068 {
2069 LRESULT_APPEND
2070 }
2071 while( right_length > 0 )
2072 {
2073 RRESULT_APPEND
2074 }
2075 #undef LEFT_ADVANCE
2076 #undef RIGHT_ADVANCE
2077 #undef RESULT_ADVANCE
2078 #undef RESULT_APPEND
2079 #undef LRESULT_APPEND
2080 #undef RRESULT_APPEND
2081
2082 }
2083
rsb_do_mergesort_float_complex_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,float complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,float complex * restrict result)2084 rsb_err_t rsb_do_mergesort_float_complex_VBR(
2085 rsb_coo_idx_t *restrict iarray,
2086 rsb_coo_idx_t *restrict jarray,
2087 rsb_coo_idx_t *restrict biarray,
2088 rsb_coo_idx_t *restrict bjarray,
2089 float complex *array,
2090 rsb_nnz_idx_t length,
2091 rsb_coo_idx_t *restrict iresult,
2092 rsb_coo_idx_t *restrict jresult,
2093 rsb_coo_idx_t *restrict biresult,
2094 rsb_coo_idx_t *restrict bjresult,
2095 float complex *restrict result)
2096
2097 {
2098 /*!
2099 * \ingroup gr_util
2100 * This function will sort the non zero elements of a sparse blocked
2101 * float complex matrix.
2102 * It will read row and column indices arrays, the values array,
2103 * and will sort them in separate output arrays.
2104 *
2105 * NOTE : This function could be optimized.
2106 *
2107 * \param iarray the input row indices array
2108 * \param jarray the input column indices array
2109 * \param array the input float complex array
2110 * \param iresult the output row indices array
2111 * \param jresult the output column indices array
2112 * \param result the output values array
2113 * \param biarray the input block row indices array
2114 * \param bjarray the input block column indices array
2115 * \param biresult the output block row indices array
2116 * \param bjresult the output block column indices array
2117 * Will sort thethree arrays (iarray, jarray, array) following the
2118 * criteria :
2119 *
2120 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
2121 * i.e.: C (row major) ordering
2122 */
2123
2124 rsb_nnz_idx_t middle;
2125 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
2126
2127 rsb_coo_idx_t * ileft ;
2128 rsb_coo_idx_t * iright ;
2129
2130 rsb_coo_idx_t * bileft ;
2131 rsb_coo_idx_t * biright ;
2132 rsb_coo_idx_t * jleft ;
2133 rsb_coo_idx_t * jright ;
2134
2135 rsb_coo_idx_t * bjleft ;
2136 rsb_coo_idx_t * bjright ;
2137 size_t tn=0;
2138 float complex * left ;
2139 float complex * right ;
2140
2141 #define LIMIT 1
2142 if(length==LIMIT)
2143 {
2144 *iresult = *iarray;
2145 *jresult = *jarray;
2146
2147 *biresult = *biarray;
2148 *bjresult = *bjarray;
2149 *(float complex*)result = *(float complex*)array;
2150 }
2151 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
2152 #undef LIMIT
2153 middle = length/2;
2154
2155
2156 bileft = biarray;
2157 bjleft = bjarray;
2158 biright = biarray+middle;
2159 bjright = bjarray+middle;
2160 left = array;
2161 right = array+middle;
2162 ileft = iarray;
2163 jleft = jarray;
2164 iright = iarray+middle;
2165 jright = jarray+middle;
2166
2167 /* 20121016 commented out omp usage because broke serial compilation */
2168 {
2169 rsb_do_mergesort_float_complex_VBR
2170 ( ileft, jleft,
2171 bileft, bjleft,
2172 left, middle,
2173 iresult , jresult,
2174 biresult, bjresult,
2175 result );
2176
2177 if(tn==1)
2178 rsb_do_mergesort_float_complex_VBR
2179 (iright, jright,
2180 biright, bjright,
2181 right, length-middle, iresult+middle ,jresult+middle,
2182 biresult+middle, bjresult+middle,
2183 ((float complex*)result)+middle );
2184 }
2185
2186 RSB_MEMCPY(ileft ,iresult ,so*middle);
2187 RSB_MEMCPY(jleft ,jresult ,so*middle);
2188
2189 RSB_MEMCPY(bileft ,biresult ,so*middle);
2190 RSB_MEMCPY(bjleft ,bjresult ,so*middle);
2191 RSB_MEMCPY( left, result ,sizeof(float complex)*middle);
2192 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
2193 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
2194
2195 RSB_MEMCPY(biright ,biresult+middle ,so*(length-middle));
2196 RSB_MEMCPY(bjright ,bjresult+middle ,so*(length-middle));
2197 RSB_MEMCPY( right, ((float complex*)result)+middle ,sizeof(float complex)*(length-middle));
2198
2199 rsb_do_merge_float_complex_VBR (
2200 ileft,iright,iresult,
2201 jleft,jright,jresult,
2202
2203 bileft, biright, biresult,
2204 bjleft, bjright, bjresult,
2205 left, right, result,
2206 middle,length-middle
2207 );
2208 return RSB_ERR_NO_ERROR; /* ! */
2209 }
2210
2211
2212
rsb_do_merge_float_complex_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const float complex * left,const float complex * restrict right,float complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)2213 void rsb_do_merge_float_complex_VBR(
2214 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
2215 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
2216 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult, const float complex* left, const float complex* restrict right, float complex* restrict result,
2217 rsb_nnz_idx_t left_length,
2218 rsb_nnz_idx_t right_length )
2219
2220
2221 {
2222 /*!
2223 * \ingroup gr_util
2224 * The merge function for our VBR matrix coefficients sorting.
2225 *
2226 * NOTE :This function is the mergesort bottleneck.
2227 */
2228 register int left_index=0, right_index=0, result_index=0;
2229
2230 /*
2231 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2232 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2233 |<- length ----->|
2234 ^- index
2235 */
2236
2237
2238 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
2239 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
2240 #define RESULT_ADVANCE result_index =( result_index+1);
2241
2242 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL) \
2243 iresult[result_index]=(IEL); \
2244 jresult[result_index]=(JEL); \
2245 result[result_index]=( EL); \
2246 biresult[result_index]=(BIEL); \
2247 bjresult[result_index]=(BJEL); \
2248 RESULT_ADVANCE;
2249
2250 #define LRESULT_APPEND \
2251 iresult[result_index]=ileft[left_index];\
2252 jresult[result_index]=jleft[left_index];\
2253 biresult[result_index]=bileft[left_index]; \
2254 bjresult[result_index]=bjleft[left_index]; \
2255 result[ result_index]= left[left_index];\
2256 RESULT_ADVANCE; \
2257 LEFT_ADVANCE;
2258
2259 #define RRESULT_APPEND \
2260 iresult[result_index]=iright[right_index];\
2261 jresult[result_index]=jright[right_index];\
2262 biresult[result_index]=biright[right_index]; \
2263 bjresult[result_index]=bjright[right_index]; \
2264 result[result_index]= right[right_index];\
2265 RESULT_ADVANCE; \
2266 RIGHT_ADVANCE;
2267
2268 while( left_length > 0 && right_length > 0)
2269 if(
2270
2271 bileft[left_index] < biright[right_index] ||
2272 ( bileft[left_index] == biright[right_index] &&
2273 bjleft[left_index] <= bjright[right_index] )
2274 )
2275 {
2276 LRESULT_APPEND
2277 }
2278 else
2279 {
2280 RRESULT_APPEND
2281 }
2282
2283 while( left_length > 0 )
2284 {
2285 LRESULT_APPEND
2286 }
2287 while( right_length > 0 )
2288 {
2289 RRESULT_APPEND
2290 }
2291 #undef LEFT_ADVANCE
2292 #undef RIGHT_ADVANCE
2293 #undef RESULT_ADVANCE
2294 #undef RESULT_APPEND
2295 #undef LRESULT_APPEND
2296 #undef RRESULT_APPEND
2297
2298 }
2299
rsb_do_mergesort_double_complex_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,double complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,double complex * restrict result)2300 rsb_err_t rsb_do_mergesort_double_complex_VBR(
2301 rsb_coo_idx_t *restrict iarray,
2302 rsb_coo_idx_t *restrict jarray,
2303 rsb_coo_idx_t *restrict biarray,
2304 rsb_coo_idx_t *restrict bjarray,
2305 double complex *array,
2306 rsb_nnz_idx_t length,
2307 rsb_coo_idx_t *restrict iresult,
2308 rsb_coo_idx_t *restrict jresult,
2309 rsb_coo_idx_t *restrict biresult,
2310 rsb_coo_idx_t *restrict bjresult,
2311 double complex *restrict result)
2312
2313 {
2314 /*!
2315 * \ingroup gr_util
2316 * This function will sort the non zero elements of a sparse blocked
2317 * double complex matrix.
2318 * It will read row and column indices arrays, the values array,
2319 * and will sort them in separate output arrays.
2320 *
2321 * NOTE : This function could be optimized.
2322 *
2323 * \param iarray the input row indices array
2324 * \param jarray the input column indices array
2325 * \param array the input double complex array
2326 * \param iresult the output row indices array
2327 * \param jresult the output column indices array
2328 * \param result the output values array
2329 * \param biarray the input block row indices array
2330 * \param bjarray the input block column indices array
2331 * \param biresult the output block row indices array
2332 * \param bjresult the output block column indices array
2333 * Will sort thethree arrays (iarray, jarray, array) following the
2334 * criteria :
2335 *
2336 * (ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
2337 * i.e.: C (row major) ordering
2338 */
2339
2340 rsb_nnz_idx_t middle;
2341 rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
2342
2343 rsb_coo_idx_t * ileft ;
2344 rsb_coo_idx_t * iright ;
2345
2346 rsb_coo_idx_t * bileft ;
2347 rsb_coo_idx_t * biright ;
2348 rsb_coo_idx_t * jleft ;
2349 rsb_coo_idx_t * jright ;
2350
2351 rsb_coo_idx_t * bjleft ;
2352 rsb_coo_idx_t * bjright ;
2353 size_t tn=0;
2354 double complex * left ;
2355 double complex * right ;
2356
2357 #define LIMIT 1
2358 if(length==LIMIT)
2359 {
2360 *iresult = *iarray;
2361 *jresult = *jarray;
2362
2363 *biresult = *biarray;
2364 *bjresult = *bjarray;
2365 *(double complex*)result = *(double complex*)array;
2366 }
2367 if(length<=LIMIT) return RSB_ERR_NO_ERROR;
2368 #undef LIMIT
2369 middle = length/2;
2370
2371
2372 bileft = biarray;
2373 bjleft = bjarray;
2374 biright = biarray+middle;
2375 bjright = bjarray+middle;
2376 left = array;
2377 right = array+middle;
2378 ileft = iarray;
2379 jleft = jarray;
2380 iright = iarray+middle;
2381 jright = jarray+middle;
2382
2383 /* 20121016 commented out omp usage because broke serial compilation */
2384 {
2385 rsb_do_mergesort_double_complex_VBR
2386 ( ileft, jleft,
2387 bileft, bjleft,
2388 left, middle,
2389 iresult , jresult,
2390 biresult, bjresult,
2391 result );
2392
2393 if(tn==1)
2394 rsb_do_mergesort_double_complex_VBR
2395 (iright, jright,
2396 biright, bjright,
2397 right, length-middle, iresult+middle ,jresult+middle,
2398 biresult+middle, bjresult+middle,
2399 ((double complex*)result)+middle );
2400 }
2401
2402 RSB_MEMCPY(ileft ,iresult ,so*middle);
2403 RSB_MEMCPY(jleft ,jresult ,so*middle);
2404
2405 RSB_MEMCPY(bileft ,biresult ,so*middle);
2406 RSB_MEMCPY(bjleft ,bjresult ,so*middle);
2407 RSB_MEMCPY( left, result ,sizeof(double complex)*middle);
2408 RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
2409 RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
2410
2411 RSB_MEMCPY(biright ,biresult+middle ,so*(length-middle));
2412 RSB_MEMCPY(bjright ,bjresult+middle ,so*(length-middle));
2413 RSB_MEMCPY( right, ((double complex*)result)+middle ,sizeof(double complex)*(length-middle));
2414
2415 rsb_do_merge_double_complex_VBR (
2416 ileft,iright,iresult,
2417 jleft,jright,jresult,
2418
2419 bileft, biright, biresult,
2420 bjleft, bjright, bjresult,
2421 left, right, result,
2422 middle,length-middle
2423 );
2424 return RSB_ERR_NO_ERROR; /* ! */
2425 }
2426
2427
2428
rsb_do_merge_double_complex_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const double complex * left,const double complex * restrict right,double complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)2429 void rsb_do_merge_double_complex_VBR(
2430 const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright, rsb_coo_idx_t*restrict iresult,
2431 const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright, rsb_coo_idx_t*restrict jresult,
2432 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult, const double complex* left, const double complex* restrict right, double complex* restrict result,
2433 rsb_nnz_idx_t left_length,
2434 rsb_nnz_idx_t right_length )
2435
2436
2437 {
2438 /*!
2439 * \ingroup gr_util
2440 * The merge function for our VBR matrix coefficients sorting.
2441 *
2442 * NOTE :This function is the mergesort bottleneck.
2443 */
2444 register int left_index=0, right_index=0, result_index=0;
2445
2446 /*
2447 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2448 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2449 |<- length ----->|
2450 ^- index
2451 */
2452
2453
2454 #define LEFT_ADVANCE left_index =( left_index+1); left_length-- ;
2455 #define RIGHT_ADVANCE right_index=(right_index+1); right_length--;
2456 #define RESULT_ADVANCE result_index =( result_index+1);
2457
2458 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL) \
2459 iresult[result_index]=(IEL); \
2460 jresult[result_index]=(JEL); \
2461 result[result_index]=( EL); \
2462 biresult[result_index]=(BIEL); \
2463 bjresult[result_index]=(BJEL); \
2464 RESULT_ADVANCE;
2465
2466 #define LRESULT_APPEND \
2467 iresult[result_index]=ileft[left_index];\
2468 jresult[result_index]=jleft[left_index];\
2469 biresult[result_index]=bileft[left_index]; \
2470 bjresult[result_index]=bjleft[left_index]; \
2471 result[ result_index]= left[left_index];\
2472 RESULT_ADVANCE; \
2473 LEFT_ADVANCE;
2474
2475 #define RRESULT_APPEND \
2476 iresult[result_index]=iright[right_index];\
2477 jresult[result_index]=jright[right_index];\
2478 biresult[result_index]=biright[right_index]; \
2479 bjresult[result_index]=bjright[right_index]; \
2480 result[result_index]= right[right_index];\
2481 RESULT_ADVANCE; \
2482 RIGHT_ADVANCE;
2483
2484 while( left_length > 0 && right_length > 0)
2485 if(
2486
2487 bileft[left_index] < biright[right_index] ||
2488 ( bileft[left_index] == biright[right_index] &&
2489 bjleft[left_index] <= bjright[right_index] )
2490 )
2491 {
2492 LRESULT_APPEND
2493 }
2494 else
2495 {
2496 RRESULT_APPEND
2497 }
2498
2499 while( left_length > 0 )
2500 {
2501 LRESULT_APPEND
2502 }
2503 while( right_length > 0 )
2504 {
2505 RRESULT_APPEND
2506 }
2507 #undef LEFT_ADVANCE
2508 #undef RIGHT_ADVANCE
2509 #undef RESULT_ADVANCE
2510 #undef RESULT_APPEND
2511 #undef LRESULT_APPEND
2512 #undef RRESULT_APPEND
2513
2514 }
2515
2516 #ifdef __cplusplus
2517 }
2518 #endif /* __cplusplus */
2519
2520 /* @endcond */
2521