1 
2 
3 /* @cond INNERDOC */
4 /**
5  * @file
6  * @brief
7  * Sorting functions.
8  */
9 
10 /*
11 
12 Copyright (C) 2008-2020 Michele Martone
13 
14 This file is part of librsb.
15 
16 librsb is free software; you can redistribute it and/or modify it
17 under the terms of the GNU Lesser General Public License as published
18 by the Free Software Foundation; either version 3 of the License, or
19 (at your option) any later version.
20 
21 librsb is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
24 License for more details.
25 
26 You should have received a copy of the GNU Lesser General Public
27 License along with librsb; see the file COPYING.
28 If not, see <http://www.gnu.org/licenses/>.
29 
30 */
31 /*
32  The code in this file was generated automatically by an M4 script.
33  It is not meant to be used as an API (Application Programming Interface).
34  p.s.: right now, only row major matrix access is considered.
35 
36  */
37 
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif /* __cplusplus */
42 
43 
44 
45 
46 #include "rsb.h"
47 #include "rsb_common.h"
48 #include "rsb_internals.h"
49 
50 
51 extern struct rsb_session_handle_t rsb_global_session_handle;
52 
rsb__do_mergesort_CSR(rsb_coo_idx_t * iarray,rsb_coo_idx_t * jarray,void * array,rsb_nnz_idx_t length,rsb_coo_idx_t * iresult,rsb_coo_idx_t * jresult,void * result,rsb_type_t type)53 rsb_err_t rsb__do_mergesort_CSR(
54 	rsb_coo_idx_t *iarray,
55 	rsb_coo_idx_t *jarray,
56 	void *array,
57 	rsb_nnz_idx_t length,
58 	rsb_coo_idx_t *iresult,
59 	rsb_coo_idx_t *jresult,
60 	void *result,
61 	rsb_type_t type)
62 {
63 	/*!
64 	 * \ingroup gr_util
65 	 *	This function will sort the non zero elements of a sparse matrix.
66 	 *      It will read row and column indices arrays, the values array,
67 	 *	and will sort them in separate output arrays.
68 	 *
69 	 * 	\param length  the input  arrays length
70 	 * 	\param iarray  the input  row    indices array
71 	 * 	\param jarray  the input  column indices array
72 	 * 	\param array   the input  mtype array
73 	 * 	\param iresult the output row    indices array
74 	 * 	\param jresult the output column indices array
75 	 * 	\param result  the output values array
76 	 *	FIXME : UNDOCUMENTED
77 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
78 	 *	criteria :
79 	 *
80 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
81 	 * 	i.e.: C (row major) ordering
82 	 */
83 
84 
85 	if(type == RSB_NUMERICAL_TYPE_DOUBLE )
86 	return rsb_do_mergesort_double_CSR(iarray, jarray,
87 array, length,
88 iresult, jresult,
89 result);
90 	else
91 	if(type == RSB_NUMERICAL_TYPE_FLOAT )
92 	return rsb_do_mergesort_float_CSR(iarray, jarray,
93 array, length,
94 iresult, jresult,
95 result);
96 	else
97 	if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
98 	return rsb_do_mergesort_float_complex_CSR(iarray, jarray,
99 array, length,
100 iresult, jresult,
101 result);
102 	else
103 	if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
104 	return rsb_do_mergesort_double_complex_CSR(iarray, jarray,
105 array, length,
106 iresult, jresult,
107 result);
108 	else
109 	return RSB_ERR_UNSUPPORTED_TYPE;
110 }
111 
rsb__do_mergesort_BCSR(rsb_coo_idx_t * iarray,rsb_coo_idx_t * jarray,void * array,rsb_nnz_idx_t length,rsb_coo_idx_t mb,rsb_coo_idx_t kb,rsb_coo_idx_t * iresult,rsb_coo_idx_t * jresult,void * result,rsb_type_t type)112 rsb_err_t rsb__do_mergesort_BCSR(
113 	rsb_coo_idx_t *iarray,
114 	rsb_coo_idx_t *jarray,
115 	void *array,
116 	rsb_nnz_idx_t length,
117 	rsb_coo_idx_t mb,
118 	rsb_coo_idx_t kb,
119 	rsb_coo_idx_t *iresult,
120 	rsb_coo_idx_t *jresult,
121 	void *result,
122 	rsb_type_t type)
123 {
124 	/*!
125 	 * \ingroup gr_util
126 	 *	This function will sort the non zero elements of a sparse matrix.
127 	 *      It will read row and column indices arrays, the values array,
128 	 *	and will sort them in separate output arrays.
129 	 *
130 	 * 	\param length  the input  arrays length
131 	 * 	\param iarray  the input  row    indices array
132 	 * 	\param jarray  the input  column indices array
133 	 * 	\param array   the input  mtype array
134 	 * 	\param iresult the output row    indices array
135 	 * 	\param jresult the output column indices array
136 	 * 	\param result  the output values array
137 
138 	 *	FIXME : UNDOCUMENTED
139 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
140 	 *	criteria :
141 	 *
142 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
143 	 * 	i.e.: C (row major) ordering
144 	 */
145 
146 
147 	if(type == RSB_NUMERICAL_TYPE_DOUBLE )
148 	return rsb_do_mergesort_double_BCSR(iarray, jarray,
149 	mb,kb,array, length,
150 iresult, jresult,
151 result);
152 	else
153 	if(type == RSB_NUMERICAL_TYPE_FLOAT )
154 	return rsb_do_mergesort_float_BCSR(iarray, jarray,
155 	mb,kb,array, length,
156 iresult, jresult,
157 result);
158 	else
159 	if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
160 	return rsb_do_mergesort_float_complex_BCSR(iarray, jarray,
161 	mb,kb,array, length,
162 iresult, jresult,
163 result);
164 	else
165 	if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
166 	return rsb_do_mergesort_double_complex_BCSR(iarray, jarray,
167 	mb,kb,array, length,
168 iresult, jresult,
169 result);
170 	else
171 	return RSB_ERR_UNSUPPORTED_TYPE;
172 }
173 
rsb__do_mergesort_VBR(rsb_coo_idx_t * iarray,rsb_coo_idx_t * jarray,rsb_coo_idx_t * biarray,rsb_coo_idx_t * bjarray,void * array,rsb_nnz_idx_t length,rsb_coo_idx_t * iresult,rsb_coo_idx_t * jresult,rsb_coo_idx_t * biresult,rsb_coo_idx_t * bjresult,void * result,rsb_type_t type)174 rsb_err_t rsb__do_mergesort_VBR(
175 	rsb_coo_idx_t *iarray,
176 	rsb_coo_idx_t *jarray,
177 	rsb_coo_idx_t *biarray,
178 	rsb_coo_idx_t *bjarray,
179 	void *array,
180 	rsb_nnz_idx_t length,
181 	rsb_coo_idx_t *iresult,
182 	rsb_coo_idx_t *jresult,
183 	rsb_coo_idx_t *biresult,
184 	rsb_coo_idx_t *bjresult,
185 	void *result,
186 	rsb_type_t type)
187 {
188 	/*!
189 	 * \ingroup gr_util
190 	 *	This function will sort the non zero elements of a sparse blocked matrix.
191 	 *      It will read row and column indices arrays, the values array,
192 	 *	and will sort them in separate output arrays.
193 	 *
194 	 * 	\param length  the input  arrays length
195 	 * 	\param iarray  the input  row    indices array
196 	 * 	\param jarray  the input  column indices array
197 	 * 	\param array   the input  mtype array
198 	 * 	\param iresult the output row    indices array
199 	 * 	\param jresult the output column indices array
200 	 * 	\param result  the output values array
201 	 * 	\param biarray  the input  block row    indices array
202 	 * 	\param bjarray  the input  block column indices array
203 	 * 	\param biresult the output block row    indices array
204 	 * 	\param bjresult the output block column indices array
205 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
206 	 *	criteria :
207 	 *
208 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
209 	 * 	i.e.: C (row major) ordering
210 	 */
211 
212 
213 	if(type == RSB_NUMERICAL_TYPE_DOUBLE )
214 	return rsb_do_mergesort_double_VBR(iarray, jarray,
215 	biarray,bjarray,array, length,
216 iresult, jresult,
217 	biresult,bjresult,result);
218 	else
219 	if(type == RSB_NUMERICAL_TYPE_FLOAT )
220 	return rsb_do_mergesort_float_VBR(iarray, jarray,
221 	biarray,bjarray,array, length,
222 iresult, jresult,
223 	biresult,bjresult,result);
224 	else
225 	if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
226 	return rsb_do_mergesort_float_complex_VBR(iarray, jarray,
227 	biarray,bjarray,array, length,
228 iresult, jresult,
229 	biresult,bjresult,result);
230 	else
231 	if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
232 	return rsb_do_mergesort_double_complex_VBR(iarray, jarray,
233 	biarray,bjarray,array, length,
234 iresult, jresult,
235 	biresult,bjresult,result);
236 	else
237 	return RSB_ERR_UNSUPPORTED_TYPE;
238 }
239 
rsb_do_mergesort_double_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,double * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double * restrict result)240 rsb_err_t rsb_do_mergesort_double_CSR(
241 	rsb_coo_idx_t *restrict iarray,
242 	rsb_coo_idx_t *restrict jarray,
243 	double *array,
244 	rsb_nnz_idx_t length,
245 	rsb_coo_idx_t *restrict iresult,
246 	rsb_coo_idx_t *restrict jresult,
247 	double *restrict result)
248 
249 {
250 	/*!
251 	 *	\ingroup gr_util
252 	 *	This function will sort the non zero elements of a sparse blocked
253 	 *      double matrix.
254 	 *      It will read row and column indices arrays, the values array,
255 	 *	and will sort them in separate output arrays.
256 	 *
257 	 *	NOTE : This function could be optimized.
258 	 *
259 	 * 	\param iarray  the input  row    indices array
260 	 * 	\param jarray  the input  column indices array
261 	 * 	\param array   the input  double array
262 	 * 	\param iresult the output row    indices array
263 	 * 	\param jresult the output column indices array
264 	 * 	\param result  the output values array
265 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
266 	 *	criteria :
267 	 *
268 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
269 	 * 	i.e.: C (row major) ordering
270 	 */
271 
272 	rsb_nnz_idx_t middle;
273 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
274 
275 	rsb_coo_idx_t * ileft  ;
276 	rsb_coo_idx_t * iright ;
277 	rsb_coo_idx_t * jleft  ;
278 	rsb_coo_idx_t * jright ;
279 	size_t tn=0;
280 	double * left  ;
281 	double * right ;
282 
283 #define LIMIT 1
284 	if(length==LIMIT)
285 	{
286 		*iresult = *iarray;
287 		*jresult = *jarray;
288 		*(double*)result = *(double*)array;
289 	}
290 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
291 #undef LIMIT
292 	middle = length/2;
293 
294 	left  = array;
295 	right  = array+middle;
296 	ileft  = iarray;
297 	jleft  = jarray;
298 	iright = iarray+middle;
299 	jright = jarray+middle;
300 
301 /* 20121016 commented out omp usage because broke serial compilation  */
302 	{
303 	rsb_do_mergesort_double_CSR
304 	( ileft, jleft,
305 		left,   middle,
306 	        iresult  ,       jresult,
307 		result         );
308 
309 	if(tn==1)
310 	rsb_do_mergesort_double_CSR
311 	(iright, jright,
312 		right, length-middle,  iresult+middle  ,jresult+middle,
313 	((double*)result)+middle  );
314 	}
315 
316 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
317 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
318 	RSB_MEMCPY(  left, result       ,sizeof(double)*middle);
319 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
320 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
321 	RSB_MEMCPY( right, ((double*)result)+middle ,sizeof(double)*(length-middle));
322 
323 	rsb_do_merge_double_CSR		(
324 			ileft,iright,iresult,
325 			jleft,jright,jresult,
326 
327 			left, right, result,
328 			middle,length-middle
329 			);
330 	return RSB_ERR_NO_ERROR;	/* ! */
331 }
332 
333 
334 
rsb_do_merge_double_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const double * left,const double * restrict right,double * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)335 void rsb_do_merge_double_CSR(
336 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
337 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
338 		const double* left, const double* restrict right,  double* restrict result,
339 		rsb_nnz_idx_t left_length,
340 		rsb_nnz_idx_t right_length )
341 
342 
343 {
344 	/*!
345 	 * \ingroup gr_util
346 	 * The merge function for our CSR matrix coefficients sorting.
347 	 *
348 	 * NOTE :This function is the mergesort bottleneck.
349 	 */
350 	register int left_index=0, right_index=0, result_index=0;
351 
352 	/*
353 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
354 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
355 	          |<-  length ----->|
356 	          ^- index
357 	 */
358 
359 
360 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
361 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
362 #define RESULT_ADVANCE		result_index =( result_index+1);
363 
364 #define RESULT_APPEND(IEL,JEL,EL)	\
365 	iresult[result_index]=(IEL);  \
366 	jresult[result_index]=(JEL);  \
367 	result[result_index]=( EL);  \
368 	RESULT_ADVANCE;
369 
370 #define LRESULT_APPEND	\
371 	iresult[result_index]=ileft[left_index];\
372 	jresult[result_index]=jleft[left_index];\
373 	result[ result_index]= left[left_index];\
374 	RESULT_ADVANCE; \
375 	LEFT_ADVANCE;
376 
377 #define RRESULT_APPEND	\
378 	iresult[result_index]=iright[right_index];\
379 	jresult[result_index]=jright[right_index];\
380 	 result[result_index]= right[right_index];\
381 	RESULT_ADVANCE; \
382 	RIGHT_ADVANCE;
383 
384 	while( left_length > 0 && right_length > 0)
385 	if(
386 
387 		ileft[left_index] < iright[right_index] ||
388 		(	ileft[left_index] == iright[right_index]	&&
389 			jleft[left_index] <= jright[right_index]	)
390 		)
391 				{
392 		LRESULT_APPEND
393 	}
394 	else
395 	{
396 		RRESULT_APPEND
397 	}
398 
399 	while( left_length  > 0 )
400 	{
401 		LRESULT_APPEND
402 	}
403 	while( right_length  > 0 )
404 	{
405 		RRESULT_APPEND
406 	}
407 #undef LEFT_ADVANCE
408 #undef RIGHT_ADVANCE
409 #undef RESULT_ADVANCE
410 #undef RESULT_APPEND
411 #undef LRESULT_APPEND
412 #undef RRESULT_APPEND
413 
414 }
415 
rsb_do_mergesort_float_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,float * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float * restrict result)416 rsb_err_t rsb_do_mergesort_float_CSR(
417 	rsb_coo_idx_t *restrict iarray,
418 	rsb_coo_idx_t *restrict jarray,
419 	float *array,
420 	rsb_nnz_idx_t length,
421 	rsb_coo_idx_t *restrict iresult,
422 	rsb_coo_idx_t *restrict jresult,
423 	float *restrict result)
424 
425 {
426 	/*!
427 	 *	\ingroup gr_util
428 	 *	This function will sort the non zero elements of a sparse blocked
429 	 *      float matrix.
430 	 *      It will read row and column indices arrays, the values array,
431 	 *	and will sort them in separate output arrays.
432 	 *
433 	 *	NOTE : This function could be optimized.
434 	 *
435 	 * 	\param iarray  the input  row    indices array
436 	 * 	\param jarray  the input  column indices array
437 	 * 	\param array   the input  float array
438 	 * 	\param iresult the output row    indices array
439 	 * 	\param jresult the output column indices array
440 	 * 	\param result  the output values array
441 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
442 	 *	criteria :
443 	 *
444 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
445 	 * 	i.e.: C (row major) ordering
446 	 */
447 
448 	rsb_nnz_idx_t middle;
449 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
450 
451 	rsb_coo_idx_t * ileft  ;
452 	rsb_coo_idx_t * iright ;
453 	rsb_coo_idx_t * jleft  ;
454 	rsb_coo_idx_t * jright ;
455 	size_t tn=0;
456 	float * left  ;
457 	float * right ;
458 
459 #define LIMIT 1
460 	if(length==LIMIT)
461 	{
462 		*iresult = *iarray;
463 		*jresult = *jarray;
464 		*(float*)result = *(float*)array;
465 	}
466 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
467 #undef LIMIT
468 	middle = length/2;
469 
470 	left  = array;
471 	right  = array+middle;
472 	ileft  = iarray;
473 	jleft  = jarray;
474 	iright = iarray+middle;
475 	jright = jarray+middle;
476 
477 /* 20121016 commented out omp usage because broke serial compilation  */
478 	{
479 	rsb_do_mergesort_float_CSR
480 	( ileft, jleft,
481 		left,   middle,
482 	        iresult  ,       jresult,
483 		result         );
484 
485 	if(tn==1)
486 	rsb_do_mergesort_float_CSR
487 	(iright, jright,
488 		right, length-middle,  iresult+middle  ,jresult+middle,
489 	((float*)result)+middle  );
490 	}
491 
492 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
493 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
494 	RSB_MEMCPY(  left, result       ,sizeof(float)*middle);
495 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
496 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
497 	RSB_MEMCPY( right, ((float*)result)+middle ,sizeof(float)*(length-middle));
498 
499 	rsb_do_merge_float_CSR		(
500 			ileft,iright,iresult,
501 			jleft,jright,jresult,
502 
503 			left, right, result,
504 			middle,length-middle
505 			);
506 	return RSB_ERR_NO_ERROR;	/* ! */
507 }
508 
509 
510 
rsb_do_merge_float_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const float * left,const float * restrict right,float * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)511 void rsb_do_merge_float_CSR(
512 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
513 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
514 		const float* left, const float* restrict right,  float* restrict result,
515 		rsb_nnz_idx_t left_length,
516 		rsb_nnz_idx_t right_length )
517 
518 
519 {
520 	/*!
521 	 * \ingroup gr_util
522 	 * The merge function for our CSR matrix coefficients sorting.
523 	 *
524 	 * NOTE :This function is the mergesort bottleneck.
525 	 */
526 	register int left_index=0, right_index=0, result_index=0;
527 
528 	/*
529 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
530 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
531 	          |<-  length ----->|
532 	          ^- index
533 	 */
534 
535 
536 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
537 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
538 #define RESULT_ADVANCE		result_index =( result_index+1);
539 
540 #define RESULT_APPEND(IEL,JEL,EL)	\
541 	iresult[result_index]=(IEL);  \
542 	jresult[result_index]=(JEL);  \
543 	result[result_index]=( EL);  \
544 	RESULT_ADVANCE;
545 
546 #define LRESULT_APPEND	\
547 	iresult[result_index]=ileft[left_index];\
548 	jresult[result_index]=jleft[left_index];\
549 	result[ result_index]= left[left_index];\
550 	RESULT_ADVANCE; \
551 	LEFT_ADVANCE;
552 
553 #define RRESULT_APPEND	\
554 	iresult[result_index]=iright[right_index];\
555 	jresult[result_index]=jright[right_index];\
556 	 result[result_index]= right[right_index];\
557 	RESULT_ADVANCE; \
558 	RIGHT_ADVANCE;
559 
560 	while( left_length > 0 && right_length > 0)
561 	if(
562 
563 		ileft[left_index] < iright[right_index] ||
564 		(	ileft[left_index] == iright[right_index]	&&
565 			jleft[left_index] <= jright[right_index]	)
566 		)
567 				{
568 		LRESULT_APPEND
569 	}
570 	else
571 	{
572 		RRESULT_APPEND
573 	}
574 
575 	while( left_length  > 0 )
576 	{
577 		LRESULT_APPEND
578 	}
579 	while( right_length  > 0 )
580 	{
581 		RRESULT_APPEND
582 	}
583 #undef LEFT_ADVANCE
584 #undef RIGHT_ADVANCE
585 #undef RESULT_ADVANCE
586 #undef RESULT_APPEND
587 #undef LRESULT_APPEND
588 #undef RRESULT_APPEND
589 
590 }
591 
rsb_do_mergesort_float_complex_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,float complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float complex * restrict result)592 rsb_err_t rsb_do_mergesort_float_complex_CSR(
593 	rsb_coo_idx_t *restrict iarray,
594 	rsb_coo_idx_t *restrict jarray,
595 	float complex *array,
596 	rsb_nnz_idx_t length,
597 	rsb_coo_idx_t *restrict iresult,
598 	rsb_coo_idx_t *restrict jresult,
599 	float complex *restrict result)
600 
601 {
602 	/*!
603 	 *	\ingroup gr_util
604 	 *	This function will sort the non zero elements of a sparse blocked
605 	 *      float complex matrix.
606 	 *      It will read row and column indices arrays, the values array,
607 	 *	and will sort them in separate output arrays.
608 	 *
609 	 *	NOTE : This function could be optimized.
610 	 *
611 	 * 	\param iarray  the input  row    indices array
612 	 * 	\param jarray  the input  column indices array
613 	 * 	\param array   the input  float complex array
614 	 * 	\param iresult the output row    indices array
615 	 * 	\param jresult the output column indices array
616 	 * 	\param result  the output values array
617 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
618 	 *	criteria :
619 	 *
620 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
621 	 * 	i.e.: C (row major) ordering
622 	 */
623 
624 	rsb_nnz_idx_t middle;
625 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
626 
627 	rsb_coo_idx_t * ileft  ;
628 	rsb_coo_idx_t * iright ;
629 	rsb_coo_idx_t * jleft  ;
630 	rsb_coo_idx_t * jright ;
631 	size_t tn=0;
632 	float complex * left  ;
633 	float complex * right ;
634 
635 #define LIMIT 1
636 	if(length==LIMIT)
637 	{
638 		*iresult = *iarray;
639 		*jresult = *jarray;
640 		*(float complex*)result = *(float complex*)array;
641 	}
642 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
643 #undef LIMIT
644 	middle = length/2;
645 
646 	left  = array;
647 	right  = array+middle;
648 	ileft  = iarray;
649 	jleft  = jarray;
650 	iright = iarray+middle;
651 	jright = jarray+middle;
652 
653 /* 20121016 commented out omp usage because broke serial compilation  */
654 	{
655 	rsb_do_mergesort_float_complex_CSR
656 	( ileft, jleft,
657 		left,   middle,
658 	        iresult  ,       jresult,
659 		result         );
660 
661 	if(tn==1)
662 	rsb_do_mergesort_float_complex_CSR
663 	(iright, jright,
664 		right, length-middle,  iresult+middle  ,jresult+middle,
665 	((float complex*)result)+middle  );
666 	}
667 
668 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
669 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
670 	RSB_MEMCPY(  left, result       ,sizeof(float complex)*middle);
671 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
672 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
673 	RSB_MEMCPY( right, ((float complex*)result)+middle ,sizeof(float complex)*(length-middle));
674 
675 	rsb_do_merge_float_complex_CSR		(
676 			ileft,iright,iresult,
677 			jleft,jright,jresult,
678 
679 			left, right, result,
680 			middle,length-middle
681 			);
682 	return RSB_ERR_NO_ERROR;	/* ! */
683 }
684 
685 
686 
rsb_do_merge_float_complex_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const float complex * left,const float complex * restrict right,float complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)687 void rsb_do_merge_float_complex_CSR(
688 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
689 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
690 		const float complex* left, const float complex* restrict right,  float complex* restrict result,
691 		rsb_nnz_idx_t left_length,
692 		rsb_nnz_idx_t right_length )
693 
694 
695 {
696 	/*!
697 	 * \ingroup gr_util
698 	 * The merge function for our CSR matrix coefficients sorting.
699 	 *
700 	 * NOTE :This function is the mergesort bottleneck.
701 	 */
702 	register int left_index=0, right_index=0, result_index=0;
703 
704 	/*
705 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
706 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
707 	          |<-  length ----->|
708 	          ^- index
709 	 */
710 
711 
712 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
713 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
714 #define RESULT_ADVANCE		result_index =( result_index+1);
715 
716 #define RESULT_APPEND(IEL,JEL,EL)	\
717 	iresult[result_index]=(IEL);  \
718 	jresult[result_index]=(JEL);  \
719 	result[result_index]=( EL);  \
720 	RESULT_ADVANCE;
721 
722 #define LRESULT_APPEND	\
723 	iresult[result_index]=ileft[left_index];\
724 	jresult[result_index]=jleft[left_index];\
725 	result[ result_index]= left[left_index];\
726 	RESULT_ADVANCE; \
727 	LEFT_ADVANCE;
728 
729 #define RRESULT_APPEND	\
730 	iresult[result_index]=iright[right_index];\
731 	jresult[result_index]=jright[right_index];\
732 	 result[result_index]= right[right_index];\
733 	RESULT_ADVANCE; \
734 	RIGHT_ADVANCE;
735 
736 	while( left_length > 0 && right_length > 0)
737 	if(
738 
739 		ileft[left_index] < iright[right_index] ||
740 		(	ileft[left_index] == iright[right_index]	&&
741 			jleft[left_index] <= jright[right_index]	)
742 		)
743 				{
744 		LRESULT_APPEND
745 	}
746 	else
747 	{
748 		RRESULT_APPEND
749 	}
750 
751 	while( left_length  > 0 )
752 	{
753 		LRESULT_APPEND
754 	}
755 	while( right_length  > 0 )
756 	{
757 		RRESULT_APPEND
758 	}
759 #undef LEFT_ADVANCE
760 #undef RIGHT_ADVANCE
761 #undef RESULT_ADVANCE
762 #undef RESULT_APPEND
763 #undef LRESULT_APPEND
764 #undef RRESULT_APPEND
765 
766 }
767 
rsb_do_mergesort_double_complex_CSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,double complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double complex * restrict result)768 rsb_err_t rsb_do_mergesort_double_complex_CSR(
769 	rsb_coo_idx_t *restrict iarray,
770 	rsb_coo_idx_t *restrict jarray,
771 	double complex *array,
772 	rsb_nnz_idx_t length,
773 	rsb_coo_idx_t *restrict iresult,
774 	rsb_coo_idx_t *restrict jresult,
775 	double complex *restrict result)
776 
777 {
778 	/*!
779 	 *	\ingroup gr_util
780 	 *	This function will sort the non zero elements of a sparse blocked
781 	 *      double complex matrix.
782 	 *      It will read row and column indices arrays, the values array,
783 	 *	and will sort them in separate output arrays.
784 	 *
785 	 *	NOTE : This function could be optimized.
786 	 *
787 	 * 	\param iarray  the input  row    indices array
788 	 * 	\param jarray  the input  column indices array
789 	 * 	\param array   the input  double complex array
790 	 * 	\param iresult the output row    indices array
791 	 * 	\param jresult the output column indices array
792 	 * 	\param result  the output values array
793 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
794 	 *	criteria :
795 	 *
796 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
797 	 * 	i.e.: C (row major) ordering
798 	 */
799 
800 	rsb_nnz_idx_t middle;
801 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
802 
803 	rsb_coo_idx_t * ileft  ;
804 	rsb_coo_idx_t * iright ;
805 	rsb_coo_idx_t * jleft  ;
806 	rsb_coo_idx_t * jright ;
807 	size_t tn=0;
808 	double complex * left  ;
809 	double complex * right ;
810 
811 #define LIMIT 1
812 	if(length==LIMIT)
813 	{
814 		*iresult = *iarray;
815 		*jresult = *jarray;
816 		*(double complex*)result = *(double complex*)array;
817 	}
818 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
819 #undef LIMIT
820 	middle = length/2;
821 
822 	left  = array;
823 	right  = array+middle;
824 	ileft  = iarray;
825 	jleft  = jarray;
826 	iright = iarray+middle;
827 	jright = jarray+middle;
828 
829 /* 20121016 commented out omp usage because broke serial compilation  */
830 	{
831 	rsb_do_mergesort_double_complex_CSR
832 	( ileft, jleft,
833 		left,   middle,
834 	        iresult  ,       jresult,
835 		result         );
836 
837 	if(tn==1)
838 	rsb_do_mergesort_double_complex_CSR
839 	(iright, jright,
840 		right, length-middle,  iresult+middle  ,jresult+middle,
841 	((double complex*)result)+middle  );
842 	}
843 
844 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
845 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
846 	RSB_MEMCPY(  left, result       ,sizeof(double complex)*middle);
847 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
848 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
849 	RSB_MEMCPY( right, ((double complex*)result)+middle ,sizeof(double complex)*(length-middle));
850 
851 	rsb_do_merge_double_complex_CSR		(
852 			ileft,iright,iresult,
853 			jleft,jright,jresult,
854 
855 			left, right, result,
856 			middle,length-middle
857 			);
858 	return RSB_ERR_NO_ERROR;	/* ! */
859 }
860 
861 
862 
rsb_do_merge_double_complex_CSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const double complex * left,const double complex * restrict right,double complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)863 void rsb_do_merge_double_complex_CSR(
864 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
865 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
866 		const double complex* left, const double complex* restrict right,  double complex* restrict result,
867 		rsb_nnz_idx_t left_length,
868 		rsb_nnz_idx_t right_length )
869 
870 
871 {
872 	/*!
873 	 * \ingroup gr_util
874 	 * The merge function for our CSR matrix coefficients sorting.
875 	 *
876 	 * NOTE :This function is the mergesort bottleneck.
877 	 */
878 	register int left_index=0, right_index=0, result_index=0;
879 
880 	/*
881 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
882 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
883 	          |<-  length ----->|
884 	          ^- index
885 	 */
886 
887 
888 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
889 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
890 #define RESULT_ADVANCE		result_index =( result_index+1);
891 
892 #define RESULT_APPEND(IEL,JEL,EL)	\
893 	iresult[result_index]=(IEL);  \
894 	jresult[result_index]=(JEL);  \
895 	result[result_index]=( EL);  \
896 	RESULT_ADVANCE;
897 
898 #define LRESULT_APPEND	\
899 	iresult[result_index]=ileft[left_index];\
900 	jresult[result_index]=jleft[left_index];\
901 	result[ result_index]= left[left_index];\
902 	RESULT_ADVANCE; \
903 	LEFT_ADVANCE;
904 
905 #define RRESULT_APPEND	\
906 	iresult[result_index]=iright[right_index];\
907 	jresult[result_index]=jright[right_index];\
908 	 result[result_index]= right[right_index];\
909 	RESULT_ADVANCE; \
910 	RIGHT_ADVANCE;
911 
912 	while( left_length > 0 && right_length > 0)
913 	if(
914 
915 		ileft[left_index] < iright[right_index] ||
916 		(	ileft[left_index] == iright[right_index]	&&
917 			jleft[left_index] <= jright[right_index]	)
918 		)
919 				{
920 		LRESULT_APPEND
921 	}
922 	else
923 	{
924 		RRESULT_APPEND
925 	}
926 
927 	while( left_length  > 0 )
928 	{
929 		LRESULT_APPEND
930 	}
931 	while( right_length  > 0 )
932 	{
933 		RRESULT_APPEND
934 	}
935 #undef LEFT_ADVANCE
936 #undef RIGHT_ADVANCE
937 #undef RESULT_ADVANCE
938 #undef RESULT_APPEND
939 #undef LRESULT_APPEND
940 #undef RRESULT_APPEND
941 
942 }
943 
rsb_do_mergesort_double_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,double * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double * restrict result)944 rsb_err_t rsb_do_mergesort_double_BCSR(
945 	rsb_coo_idx_t *restrict iarray,
946 	rsb_coo_idx_t *restrict jarray,
947 	rsb_coo_idx_t mb, rsb_coo_idx_t kb,
948 	double *array,
949 	rsb_nnz_idx_t length,
950 	rsb_coo_idx_t *restrict iresult,
951 	rsb_coo_idx_t *restrict jresult,
952 	double *restrict result)
953 
954 {
955 	/*!
956 	 *	\ingroup gr_util
957 	 *	This function will sort the non zero elements of a sparse blocked
958 	 *      double matrix.
959 	 *      It will read row and column indices arrays, the values array,
960 	 *	and will sort them in separate output arrays.
961 	 *
962 	 *	NOTE : This function could be optimized.
963 	 *
964 	 * 	\param iarray  the input  row    indices array
965 	 * 	\param jarray  the input  column indices array
966 	 * 	\param array   the input  double array
967 	 * 	\param iresult the output row    indices array
968 	 * 	\param jresult the output column indices array
969 	 * 	\param result  the output values array
970 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
971 	 *	criteria :
972 	 *
973 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
974 	 * 	i.e.: C (row major) ordering
975 	 */
976 
977 	rsb_nnz_idx_t middle;
978 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
979 
980 	rsb_coo_idx_t * ileft  ;
981 	rsb_coo_idx_t * iright ;
982 	rsb_coo_idx_t * jleft  ;
983 	rsb_coo_idx_t * jright ;
984 	size_t tn=0;
985 	double * left  ;
986 	double * right ;
987 
988 #define LIMIT 1
989 	if(length==LIMIT)
990 	{
991 		*iresult = *iarray;
992 		*jresult = *jarray;
993 		*(double*)result = *(double*)array;
994 	}
995 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
996 #undef LIMIT
997 	middle = length/2;
998 
999 	left  = array;
1000 	right  = array+middle;
1001 	ileft  = iarray;
1002 	jleft  = jarray;
1003 	iright = iarray+middle;
1004 	jright = jarray+middle;
1005 
1006 /* 20121016 commented out omp usage because broke serial compilation  */
1007 	{
1008 	rsb_do_mergesort_double_BCSR
1009 	( ileft, jleft,
1010  mb, kb, 		left,   middle,
1011 	        iresult  ,       jresult,
1012 		result         );
1013 
1014 	if(tn==1)
1015 	rsb_do_mergesort_double_BCSR
1016 	(iright, jright,
1017  mb, kb, 		right, length-middle,  iresult+middle  ,jresult+middle,
1018 	((double*)result)+middle  );
1019 	}
1020 
1021 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
1022 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
1023 	RSB_MEMCPY(  left, result       ,sizeof(double)*middle);
1024 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1025 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1026 	RSB_MEMCPY( right, ((double*)result)+middle ,sizeof(double)*(length-middle));
1027 
1028 	rsb_do_merge_double_BCSR		(
1029 			ileft,iright,iresult,
1030 			jleft,jright,jresult,
1031 	mb,kb,
1032 			left, right, result,
1033 			middle,length-middle
1034 			);
1035 	return RSB_ERR_NO_ERROR;	/* ! */
1036 }
1037 
1038 
1039 
rsb_do_merge_double_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const double * left,const double * restrict right,double * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1040 void rsb_do_merge_double_BCSR(
1041 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
1042 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
1043 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb,		const double* left, const double* restrict right,  double* restrict result,
1044 		rsb_nnz_idx_t left_length,
1045 		rsb_nnz_idx_t right_length )
1046 
1047 
1048 {
1049 	/*!
1050 	 * \ingroup gr_util
1051 	 * The merge function for our BCSR matrix coefficients sorting.
1052 	 *
1053 	 * NOTE :This function is the mergesort bottleneck.
1054 	 */
1055 	register int left_index=0, right_index=0, result_index=0;
1056 
1057 	/*
1058 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1059 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1060 	          |<-  length ----->|
1061 	          ^- index
1062 	 */
1063 
1064 
1065 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
1066 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
1067 #define RESULT_ADVANCE		result_index =( result_index+1);
1068 
1069 #define RESULT_APPEND(IEL,JEL,EL)	\
1070 	iresult[result_index]=(IEL);  \
1071 	jresult[result_index]=(JEL);  \
1072 	result[result_index]=( EL);  \
1073 	RESULT_ADVANCE;
1074 
1075 #define LRESULT_APPEND	\
1076 	iresult[result_index]=ileft[left_index];\
1077 	jresult[result_index]=jleft[left_index];\
1078 	result[ result_index]= left[left_index];\
1079 	RESULT_ADVANCE; \
1080 	LEFT_ADVANCE;
1081 
1082 #define RRESULT_APPEND	\
1083 	iresult[result_index]=iright[right_index];\
1084 	jresult[result_index]=jright[right_index];\
1085 	 result[result_index]= right[right_index];\
1086 	RESULT_ADVANCE; \
1087 	RIGHT_ADVANCE;
1088 
1089 	while( left_length > 0 && right_length > 0)
1090 	if(
1091 
1092 		ileft[left_index]/mb < iright[right_index]/mb ||
1093 		(	ileft[left_index]/mb == iright[right_index]/mb	&&
1094 			jleft[left_index]/kb <= jright[right_index]/kb	)
1095 		)
1096 			{
1097 		LRESULT_APPEND
1098 	}
1099 	else
1100 	{
1101 		RRESULT_APPEND
1102 	}
1103 
1104 	while( left_length  > 0 )
1105 	{
1106 		LRESULT_APPEND
1107 	}
1108 	while( right_length  > 0 )
1109 	{
1110 		RRESULT_APPEND
1111 	}
1112 #undef LEFT_ADVANCE
1113 #undef RIGHT_ADVANCE
1114 #undef RESULT_ADVANCE
1115 #undef RESULT_APPEND
1116 #undef LRESULT_APPEND
1117 #undef RRESULT_APPEND
1118 
1119 }
1120 
rsb_do_mergesort_float_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,float * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float * restrict result)1121 rsb_err_t rsb_do_mergesort_float_BCSR(
1122 	rsb_coo_idx_t *restrict iarray,
1123 	rsb_coo_idx_t *restrict jarray,
1124 	rsb_coo_idx_t mb, rsb_coo_idx_t kb,
1125 	float *array,
1126 	rsb_nnz_idx_t length,
1127 	rsb_coo_idx_t *restrict iresult,
1128 	rsb_coo_idx_t *restrict jresult,
1129 	float *restrict result)
1130 
1131 {
1132 	/*!
1133 	 *	\ingroup gr_util
1134 	 *	This function will sort the non zero elements of a sparse blocked
1135 	 *      float matrix.
1136 	 *      It will read row and column indices arrays, the values array,
1137 	 *	and will sort them in separate output arrays.
1138 	 *
1139 	 *	NOTE : This function could be optimized.
1140 	 *
1141 	 * 	\param iarray  the input  row    indices array
1142 	 * 	\param jarray  the input  column indices array
1143 	 * 	\param array   the input  float array
1144 	 * 	\param iresult the output row    indices array
1145 	 * 	\param jresult the output column indices array
1146 	 * 	\param result  the output values array
1147 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
1148 	 *	criteria :
1149 	 *
1150 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1151 	 * 	i.e.: C (row major) ordering
1152 	 */
1153 
1154 	rsb_nnz_idx_t middle;
1155 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1156 
1157 	rsb_coo_idx_t * ileft  ;
1158 	rsb_coo_idx_t * iright ;
1159 	rsb_coo_idx_t * jleft  ;
1160 	rsb_coo_idx_t * jright ;
1161 	size_t tn=0;
1162 	float * left  ;
1163 	float * right ;
1164 
1165 #define LIMIT 1
1166 	if(length==LIMIT)
1167 	{
1168 		*iresult = *iarray;
1169 		*jresult = *jarray;
1170 		*(float*)result = *(float*)array;
1171 	}
1172 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1173 #undef LIMIT
1174 	middle = length/2;
1175 
1176 	left  = array;
1177 	right  = array+middle;
1178 	ileft  = iarray;
1179 	jleft  = jarray;
1180 	iright = iarray+middle;
1181 	jright = jarray+middle;
1182 
1183 /* 20121016 commented out omp usage because broke serial compilation  */
1184 	{
1185 	rsb_do_mergesort_float_BCSR
1186 	( ileft, jleft,
1187  mb, kb, 		left,   middle,
1188 	        iresult  ,       jresult,
1189 		result         );
1190 
1191 	if(tn==1)
1192 	rsb_do_mergesort_float_BCSR
1193 	(iright, jright,
1194  mb, kb, 		right, length-middle,  iresult+middle  ,jresult+middle,
1195 	((float*)result)+middle  );
1196 	}
1197 
1198 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
1199 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
1200 	RSB_MEMCPY(  left, result       ,sizeof(float)*middle);
1201 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1202 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1203 	RSB_MEMCPY( right, ((float*)result)+middle ,sizeof(float)*(length-middle));
1204 
1205 	rsb_do_merge_float_BCSR		(
1206 			ileft,iright,iresult,
1207 			jleft,jright,jresult,
1208 	mb,kb,
1209 			left, right, result,
1210 			middle,length-middle
1211 			);
1212 	return RSB_ERR_NO_ERROR;	/* ! */
1213 }
1214 
1215 
1216 
rsb_do_merge_float_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const float * left,const float * restrict right,float * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1217 void rsb_do_merge_float_BCSR(
1218 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
1219 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
1220 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb,		const float* left, const float* restrict right,  float* restrict result,
1221 		rsb_nnz_idx_t left_length,
1222 		rsb_nnz_idx_t right_length )
1223 
1224 
1225 {
1226 	/*!
1227 	 * \ingroup gr_util
1228 	 * The merge function for our BCSR matrix coefficients sorting.
1229 	 *
1230 	 * NOTE :This function is the mergesort bottleneck.
1231 	 */
1232 	register int left_index=0, right_index=0, result_index=0;
1233 
1234 	/*
1235 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1236 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1237 	          |<-  length ----->|
1238 	          ^- index
1239 	 */
1240 
1241 
1242 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
1243 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
1244 #define RESULT_ADVANCE		result_index =( result_index+1);
1245 
1246 #define RESULT_APPEND(IEL,JEL,EL)	\
1247 	iresult[result_index]=(IEL);  \
1248 	jresult[result_index]=(JEL);  \
1249 	result[result_index]=( EL);  \
1250 	RESULT_ADVANCE;
1251 
1252 #define LRESULT_APPEND	\
1253 	iresult[result_index]=ileft[left_index];\
1254 	jresult[result_index]=jleft[left_index];\
1255 	result[ result_index]= left[left_index];\
1256 	RESULT_ADVANCE; \
1257 	LEFT_ADVANCE;
1258 
1259 #define RRESULT_APPEND	\
1260 	iresult[result_index]=iright[right_index];\
1261 	jresult[result_index]=jright[right_index];\
1262 	 result[result_index]= right[right_index];\
1263 	RESULT_ADVANCE; \
1264 	RIGHT_ADVANCE;
1265 
1266 	while( left_length > 0 && right_length > 0)
1267 	if(
1268 
1269 		ileft[left_index]/mb < iright[right_index]/mb ||
1270 		(	ileft[left_index]/mb == iright[right_index]/mb	&&
1271 			jleft[left_index]/kb <= jright[right_index]/kb	)
1272 		)
1273 			{
1274 		LRESULT_APPEND
1275 	}
1276 	else
1277 	{
1278 		RRESULT_APPEND
1279 	}
1280 
1281 	while( left_length  > 0 )
1282 	{
1283 		LRESULT_APPEND
1284 	}
1285 	while( right_length  > 0 )
1286 	{
1287 		RRESULT_APPEND
1288 	}
1289 #undef LEFT_ADVANCE
1290 #undef RIGHT_ADVANCE
1291 #undef RESULT_ADVANCE
1292 #undef RESULT_APPEND
1293 #undef LRESULT_APPEND
1294 #undef RRESULT_APPEND
1295 
1296 }
1297 
rsb_do_mergesort_float_complex_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,float complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,float complex * restrict result)1298 rsb_err_t rsb_do_mergesort_float_complex_BCSR(
1299 	rsb_coo_idx_t *restrict iarray,
1300 	rsb_coo_idx_t *restrict jarray,
1301 	rsb_coo_idx_t mb, rsb_coo_idx_t kb,
1302 	float complex *array,
1303 	rsb_nnz_idx_t length,
1304 	rsb_coo_idx_t *restrict iresult,
1305 	rsb_coo_idx_t *restrict jresult,
1306 	float complex *restrict result)
1307 
1308 {
1309 	/*!
1310 	 *	\ingroup gr_util
1311 	 *	This function will sort the non zero elements of a sparse blocked
1312 	 *      float complex matrix.
1313 	 *      It will read row and column indices arrays, the values array,
1314 	 *	and will sort them in separate output arrays.
1315 	 *
1316 	 *	NOTE : This function could be optimized.
1317 	 *
1318 	 * 	\param iarray  the input  row    indices array
1319 	 * 	\param jarray  the input  column indices array
1320 	 * 	\param array   the input  float complex array
1321 	 * 	\param iresult the output row    indices array
1322 	 * 	\param jresult the output column indices array
1323 	 * 	\param result  the output values array
1324 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
1325 	 *	criteria :
1326 	 *
1327 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1328 	 * 	i.e.: C (row major) ordering
1329 	 */
1330 
1331 	rsb_nnz_idx_t middle;
1332 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1333 
1334 	rsb_coo_idx_t * ileft  ;
1335 	rsb_coo_idx_t * iright ;
1336 	rsb_coo_idx_t * jleft  ;
1337 	rsb_coo_idx_t * jright ;
1338 	size_t tn=0;
1339 	float complex * left  ;
1340 	float complex * right ;
1341 
1342 #define LIMIT 1
1343 	if(length==LIMIT)
1344 	{
1345 		*iresult = *iarray;
1346 		*jresult = *jarray;
1347 		*(float complex*)result = *(float complex*)array;
1348 	}
1349 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1350 #undef LIMIT
1351 	middle = length/2;
1352 
1353 	left  = array;
1354 	right  = array+middle;
1355 	ileft  = iarray;
1356 	jleft  = jarray;
1357 	iright = iarray+middle;
1358 	jright = jarray+middle;
1359 
1360 /* 20121016 commented out omp usage because broke serial compilation  */
1361 	{
1362 	rsb_do_mergesort_float_complex_BCSR
1363 	( ileft, jleft,
1364  mb, kb, 		left,   middle,
1365 	        iresult  ,       jresult,
1366 		result         );
1367 
1368 	if(tn==1)
1369 	rsb_do_mergesort_float_complex_BCSR
1370 	(iright, jright,
1371  mb, kb, 		right, length-middle,  iresult+middle  ,jresult+middle,
1372 	((float complex*)result)+middle  );
1373 	}
1374 
1375 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
1376 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
1377 	RSB_MEMCPY(  left, result       ,sizeof(float complex)*middle);
1378 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1379 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1380 	RSB_MEMCPY( right, ((float complex*)result)+middle ,sizeof(float complex)*(length-middle));
1381 
1382 	rsb_do_merge_float_complex_BCSR		(
1383 			ileft,iright,iresult,
1384 			jleft,jright,jresult,
1385 	mb,kb,
1386 			left, right, result,
1387 			middle,length-middle
1388 			);
1389 	return RSB_ERR_NO_ERROR;	/* ! */
1390 }
1391 
1392 
1393 
rsb_do_merge_float_complex_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const float complex * left,const float complex * restrict right,float complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1394 void rsb_do_merge_float_complex_BCSR(
1395 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
1396 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
1397 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb,		const float complex* left, const float complex* restrict right,  float complex* restrict result,
1398 		rsb_nnz_idx_t left_length,
1399 		rsb_nnz_idx_t right_length )
1400 
1401 
1402 {
1403 	/*!
1404 	 * \ingroup gr_util
1405 	 * The merge function for our BCSR matrix coefficients sorting.
1406 	 *
1407 	 * NOTE :This function is the mergesort bottleneck.
1408 	 */
1409 	register int left_index=0, right_index=0, result_index=0;
1410 
1411 	/*
1412 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1413 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1414 	          |<-  length ----->|
1415 	          ^- index
1416 	 */
1417 
1418 
1419 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
1420 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
1421 #define RESULT_ADVANCE		result_index =( result_index+1);
1422 
1423 #define RESULT_APPEND(IEL,JEL,EL)	\
1424 	iresult[result_index]=(IEL);  \
1425 	jresult[result_index]=(JEL);  \
1426 	result[result_index]=( EL);  \
1427 	RESULT_ADVANCE;
1428 
1429 #define LRESULT_APPEND	\
1430 	iresult[result_index]=ileft[left_index];\
1431 	jresult[result_index]=jleft[left_index];\
1432 	result[ result_index]= left[left_index];\
1433 	RESULT_ADVANCE; \
1434 	LEFT_ADVANCE;
1435 
1436 #define RRESULT_APPEND	\
1437 	iresult[result_index]=iright[right_index];\
1438 	jresult[result_index]=jright[right_index];\
1439 	 result[result_index]= right[right_index];\
1440 	RESULT_ADVANCE; \
1441 	RIGHT_ADVANCE;
1442 
1443 	while( left_length > 0 && right_length > 0)
1444 	if(
1445 
1446 		ileft[left_index]/mb < iright[right_index]/mb ||
1447 		(	ileft[left_index]/mb == iright[right_index]/mb	&&
1448 			jleft[left_index]/kb <= jright[right_index]/kb	)
1449 		)
1450 			{
1451 		LRESULT_APPEND
1452 	}
1453 	else
1454 	{
1455 		RRESULT_APPEND
1456 	}
1457 
1458 	while( left_length  > 0 )
1459 	{
1460 		LRESULT_APPEND
1461 	}
1462 	while( right_length  > 0 )
1463 	{
1464 		RRESULT_APPEND
1465 	}
1466 #undef LEFT_ADVANCE
1467 #undef RIGHT_ADVANCE
1468 #undef RESULT_ADVANCE
1469 #undef RESULT_APPEND
1470 #undef LRESULT_APPEND
1471 #undef RRESULT_APPEND
1472 
1473 }
1474 
rsb_do_mergesort_double_complex_BCSR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t mb,rsb_coo_idx_t kb,double complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,double complex * restrict result)1475 rsb_err_t rsb_do_mergesort_double_complex_BCSR(
1476 	rsb_coo_idx_t *restrict iarray,
1477 	rsb_coo_idx_t *restrict jarray,
1478 	rsb_coo_idx_t mb, rsb_coo_idx_t kb,
1479 	double complex *array,
1480 	rsb_nnz_idx_t length,
1481 	rsb_coo_idx_t *restrict iresult,
1482 	rsb_coo_idx_t *restrict jresult,
1483 	double complex *restrict result)
1484 
1485 {
1486 	/*!
1487 	 *	\ingroup gr_util
1488 	 *	This function will sort the non zero elements of a sparse blocked
1489 	 *      double complex matrix.
1490 	 *      It will read row and column indices arrays, the values array,
1491 	 *	and will sort them in separate output arrays.
1492 	 *
1493 	 *	NOTE : This function could be optimized.
1494 	 *
1495 	 * 	\param iarray  the input  row    indices array
1496 	 * 	\param jarray  the input  column indices array
1497 	 * 	\param array   the input  double complex array
1498 	 * 	\param iresult the output row    indices array
1499 	 * 	\param jresult the output column indices array
1500 	 * 	\param result  the output values array
1501 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
1502 	 *	criteria :
1503 	 *
1504 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1505 	 * 	i.e.: C (row major) ordering
1506 	 */
1507 
1508 	rsb_nnz_idx_t middle;
1509 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1510 
1511 	rsb_coo_idx_t * ileft  ;
1512 	rsb_coo_idx_t * iright ;
1513 	rsb_coo_idx_t * jleft  ;
1514 	rsb_coo_idx_t * jright ;
1515 	size_t tn=0;
1516 	double complex * left  ;
1517 	double complex * right ;
1518 
1519 #define LIMIT 1
1520 	if(length==LIMIT)
1521 	{
1522 		*iresult = *iarray;
1523 		*jresult = *jarray;
1524 		*(double complex*)result = *(double complex*)array;
1525 	}
1526 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1527 #undef LIMIT
1528 	middle = length/2;
1529 
1530 	left  = array;
1531 	right  = array+middle;
1532 	ileft  = iarray;
1533 	jleft  = jarray;
1534 	iright = iarray+middle;
1535 	jright = jarray+middle;
1536 
1537 /* 20121016 commented out omp usage because broke serial compilation  */
1538 	{
1539 	rsb_do_mergesort_double_complex_BCSR
1540 	( ileft, jleft,
1541  mb, kb, 		left,   middle,
1542 	        iresult  ,       jresult,
1543 		result         );
1544 
1545 	if(tn==1)
1546 	rsb_do_mergesort_double_complex_BCSR
1547 	(iright, jright,
1548  mb, kb, 		right, length-middle,  iresult+middle  ,jresult+middle,
1549 	((double complex*)result)+middle  );
1550 	}
1551 
1552 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
1553 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
1554 	RSB_MEMCPY(  left, result       ,sizeof(double complex)*middle);
1555 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1556 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1557 	RSB_MEMCPY( right, ((double complex*)result)+middle ,sizeof(double complex)*(length-middle));
1558 
1559 	rsb_do_merge_double_complex_BCSR		(
1560 			ileft,iright,iresult,
1561 			jleft,jright,jresult,
1562 	mb,kb,
1563 			left, right, result,
1564 			middle,length-middle
1565 			);
1566 	return RSB_ERR_NO_ERROR;	/* ! */
1567 }
1568 
1569 
1570 
rsb_do_merge_double_complex_BCSR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t mb,const rsb_coo_idx_t kb,const double complex * left,const double complex * restrict right,double complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1571 void rsb_do_merge_double_complex_BCSR(
1572 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
1573 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
1574 const rsb_coo_idx_t mb, const rsb_coo_idx_t kb,		const double complex* left, const double complex* restrict right,  double complex* restrict result,
1575 		rsb_nnz_idx_t left_length,
1576 		rsb_nnz_idx_t right_length )
1577 
1578 
1579 {
1580 	/*!
1581 	 * \ingroup gr_util
1582 	 * The merge function for our BCSR matrix coefficients sorting.
1583 	 *
1584 	 * NOTE :This function is the mergesort bottleneck.
1585 	 */
1586 	register int left_index=0, right_index=0, result_index=0;
1587 
1588 	/*
1589 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1590 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1591 	          |<-  length ----->|
1592 	          ^- index
1593 	 */
1594 
1595 
1596 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
1597 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
1598 #define RESULT_ADVANCE		result_index =( result_index+1);
1599 
1600 #define RESULT_APPEND(IEL,JEL,EL)	\
1601 	iresult[result_index]=(IEL);  \
1602 	jresult[result_index]=(JEL);  \
1603 	result[result_index]=( EL);  \
1604 	RESULT_ADVANCE;
1605 
1606 #define LRESULT_APPEND	\
1607 	iresult[result_index]=ileft[left_index];\
1608 	jresult[result_index]=jleft[left_index];\
1609 	result[ result_index]= left[left_index];\
1610 	RESULT_ADVANCE; \
1611 	LEFT_ADVANCE;
1612 
1613 #define RRESULT_APPEND	\
1614 	iresult[result_index]=iright[right_index];\
1615 	jresult[result_index]=jright[right_index];\
1616 	 result[result_index]= right[right_index];\
1617 	RESULT_ADVANCE; \
1618 	RIGHT_ADVANCE;
1619 
1620 	while( left_length > 0 && right_length > 0)
1621 	if(
1622 
1623 		ileft[left_index]/mb < iright[right_index]/mb ||
1624 		(	ileft[left_index]/mb == iright[right_index]/mb	&&
1625 			jleft[left_index]/kb <= jright[right_index]/kb	)
1626 		)
1627 			{
1628 		LRESULT_APPEND
1629 	}
1630 	else
1631 	{
1632 		RRESULT_APPEND
1633 	}
1634 
1635 	while( left_length  > 0 )
1636 	{
1637 		LRESULT_APPEND
1638 	}
1639 	while( right_length  > 0 )
1640 	{
1641 		RRESULT_APPEND
1642 	}
1643 #undef LEFT_ADVANCE
1644 #undef RIGHT_ADVANCE
1645 #undef RESULT_ADVANCE
1646 #undef RESULT_APPEND
1647 #undef LRESULT_APPEND
1648 #undef RRESULT_APPEND
1649 
1650 }
1651 
rsb_do_mergesort_double_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,double * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,double * restrict result)1652 rsb_err_t rsb_do_mergesort_double_VBR(
1653 	rsb_coo_idx_t *restrict iarray,
1654 	rsb_coo_idx_t *restrict jarray,
1655 	rsb_coo_idx_t *restrict biarray,
1656 	rsb_coo_idx_t *restrict bjarray,
1657 	double *array,
1658 	rsb_nnz_idx_t length,
1659 	rsb_coo_idx_t *restrict iresult,
1660 	rsb_coo_idx_t *restrict jresult,
1661 	rsb_coo_idx_t *restrict biresult,
1662 	rsb_coo_idx_t *restrict bjresult,
1663 	double *restrict result)
1664 
1665 {
1666 	/*!
1667 	 *	\ingroup gr_util
1668 	 *	This function will sort the non zero elements of a sparse blocked
1669 	 *      double matrix.
1670 	 *      It will read row and column indices arrays, the values array,
1671 	 *	and will sort them in separate output arrays.
1672 	 *
1673 	 *	NOTE : This function could be optimized.
1674 	 *
1675 	 * 	\param iarray  the input  row    indices array
1676 	 * 	\param jarray  the input  column indices array
1677 	 * 	\param array   the input  double array
1678 	 * 	\param iresult the output row    indices array
1679 	 * 	\param jresult the output column indices array
1680 	 * 	\param result  the output values array
1681 	 * 	\param biarray  the input  block row    indices array
1682 	 * 	\param bjarray  the input  block column indices array
1683 	 * 	\param biresult the output block row    indices array
1684 	 * 	\param bjresult the output block column indices array
1685 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
1686 	 *	criteria :
1687 	 *
1688 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1689 	 * 	i.e.: C (row major) ordering
1690 	 */
1691 
1692 	rsb_nnz_idx_t middle;
1693 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1694 
1695 	rsb_coo_idx_t * ileft  ;
1696 	rsb_coo_idx_t * iright ;
1697 
1698 	rsb_coo_idx_t * bileft  ;
1699 	rsb_coo_idx_t * biright ;
1700 	rsb_coo_idx_t * jleft  ;
1701 	rsb_coo_idx_t * jright ;
1702 
1703 	rsb_coo_idx_t * bjleft  ;
1704 	rsb_coo_idx_t * bjright ;
1705 	size_t tn=0;
1706 	double * left  ;
1707 	double * right ;
1708 
1709 #define LIMIT 1
1710 	if(length==LIMIT)
1711 	{
1712 		*iresult = *iarray;
1713 		*jresult = *jarray;
1714 
1715 		*biresult = *biarray;
1716 		*bjresult = *bjarray;
1717 		*(double*)result = *(double*)array;
1718 	}
1719 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1720 #undef LIMIT
1721 	middle = length/2;
1722 
1723 
1724 	bileft  = biarray;
1725 	bjleft  = bjarray;
1726 	biright = biarray+middle;
1727 	bjright = bjarray+middle;
1728 	left  = array;
1729 	right  = array+middle;
1730 	ileft  = iarray;
1731 	jleft  = jarray;
1732 	iright = iarray+middle;
1733 	jright = jarray+middle;
1734 
1735 /* 20121016 commented out omp usage because broke serial compilation  */
1736 	{
1737 	rsb_do_mergesort_double_VBR
1738 	( ileft, jleft,
1739 		bileft, bjleft,
1740 		left,   middle,
1741 	        iresult  ,       jresult,
1742 		biresult, bjresult,
1743 		result         );
1744 
1745 	if(tn==1)
1746 	rsb_do_mergesort_double_VBR
1747 	(iright, jright,
1748 		biright, bjright,
1749 		right, length-middle,  iresult+middle  ,jresult+middle,
1750 		biresult+middle, bjresult+middle,
1751 	((double*)result)+middle  );
1752 	}
1753 
1754 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
1755 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
1756 
1757 	RSB_MEMCPY(bileft ,biresult       ,so*middle);
1758 	RSB_MEMCPY(bjleft ,bjresult       ,so*middle);
1759 	RSB_MEMCPY(  left, result       ,sizeof(double)*middle);
1760 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1761 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1762 
1763 	RSB_MEMCPY(biright ,biresult+middle       ,so*(length-middle));
1764 	RSB_MEMCPY(bjright ,bjresult+middle       ,so*(length-middle));
1765 	RSB_MEMCPY( right, ((double*)result)+middle ,sizeof(double)*(length-middle));
1766 
1767 	rsb_do_merge_double_VBR		(
1768 			ileft,iright,iresult,
1769 			jleft,jright,jresult,
1770 
1771 			bileft, biright, biresult,
1772 			bjleft, bjright, bjresult,
1773 			left, right, result,
1774 			middle,length-middle
1775 			);
1776 	return RSB_ERR_NO_ERROR;	/* ! */
1777 }
1778 
1779 
1780 
rsb_do_merge_double_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const double * left,const double * restrict right,double * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1781 void rsb_do_merge_double_VBR(
1782 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
1783 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
1784 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult,		const double* left, const double* restrict right,  double* restrict result,
1785 		rsb_nnz_idx_t left_length,
1786 		rsb_nnz_idx_t right_length )
1787 
1788 
1789 {
1790 	/*!
1791 	 * \ingroup gr_util
1792 	 * The merge function for our VBR matrix coefficients sorting.
1793 	 *
1794 	 * NOTE :This function is the mergesort bottleneck.
1795 	 */
1796 	register int left_index=0, right_index=0, result_index=0;
1797 
1798 	/*
1799 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1800 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1801 	          |<-  length ----->|
1802 	          ^- index
1803 	 */
1804 
1805 
1806 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
1807 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
1808 #define RESULT_ADVANCE		result_index =( result_index+1);
1809 
1810 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL)	\
1811 	iresult[result_index]=(IEL);  \
1812 	jresult[result_index]=(JEL);  \
1813 	result[result_index]=( EL);  \
1814 	biresult[result_index]=(BIEL);  \
1815 	bjresult[result_index]=(BJEL);  \
1816 	RESULT_ADVANCE;
1817 
1818 #define LRESULT_APPEND	\
1819 	iresult[result_index]=ileft[left_index];\
1820 	jresult[result_index]=jleft[left_index];\
1821 	biresult[result_index]=bileft[left_index];  \
1822 	bjresult[result_index]=bjleft[left_index];  \
1823 	result[ result_index]= left[left_index];\
1824 	RESULT_ADVANCE; \
1825 	LEFT_ADVANCE;
1826 
1827 #define RRESULT_APPEND	\
1828 	iresult[result_index]=iright[right_index];\
1829 	jresult[result_index]=jright[right_index];\
1830 	biresult[result_index]=biright[right_index];  \
1831 	bjresult[result_index]=bjright[right_index];  \
1832 	 result[result_index]= right[right_index];\
1833 	RESULT_ADVANCE; \
1834 	RIGHT_ADVANCE;
1835 
1836 	while( left_length > 0 && right_length > 0)
1837 	if(
1838 
1839 		bileft[left_index] < biright[right_index] ||
1840 		(	bileft[left_index] == biright[right_index]	&&
1841 			bjleft[left_index] <= bjright[right_index]	)
1842 		)
1843 					{
1844 		LRESULT_APPEND
1845 	}
1846 	else
1847 	{
1848 		RRESULT_APPEND
1849 	}
1850 
1851 	while( left_length  > 0 )
1852 	{
1853 		LRESULT_APPEND
1854 	}
1855 	while( right_length  > 0 )
1856 	{
1857 		RRESULT_APPEND
1858 	}
1859 #undef LEFT_ADVANCE
1860 #undef RIGHT_ADVANCE
1861 #undef RESULT_ADVANCE
1862 #undef RESULT_APPEND
1863 #undef LRESULT_APPEND
1864 #undef RRESULT_APPEND
1865 
1866 }
1867 
rsb_do_mergesort_float_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,float * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,float * restrict result)1868 rsb_err_t rsb_do_mergesort_float_VBR(
1869 	rsb_coo_idx_t *restrict iarray,
1870 	rsb_coo_idx_t *restrict jarray,
1871 	rsb_coo_idx_t *restrict biarray,
1872 	rsb_coo_idx_t *restrict bjarray,
1873 	float *array,
1874 	rsb_nnz_idx_t length,
1875 	rsb_coo_idx_t *restrict iresult,
1876 	rsb_coo_idx_t *restrict jresult,
1877 	rsb_coo_idx_t *restrict biresult,
1878 	rsb_coo_idx_t *restrict bjresult,
1879 	float *restrict result)
1880 
1881 {
1882 	/*!
1883 	 *	\ingroup gr_util
1884 	 *	This function will sort the non zero elements of a sparse blocked
1885 	 *      float matrix.
1886 	 *      It will read row and column indices arrays, the values array,
1887 	 *	and will sort them in separate output arrays.
1888 	 *
1889 	 *	NOTE : This function could be optimized.
1890 	 *
1891 	 * 	\param iarray  the input  row    indices array
1892 	 * 	\param jarray  the input  column indices array
1893 	 * 	\param array   the input  float array
1894 	 * 	\param iresult the output row    indices array
1895 	 * 	\param jresult the output column indices array
1896 	 * 	\param result  the output values array
1897 	 * 	\param biarray  the input  block row    indices array
1898 	 * 	\param bjarray  the input  block column indices array
1899 	 * 	\param biresult the output block row    indices array
1900 	 * 	\param bjresult the output block column indices array
1901 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
1902 	 *	criteria :
1903 	 *
1904 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
1905 	 * 	i.e.: C (row major) ordering
1906 	 */
1907 
1908 	rsb_nnz_idx_t middle;
1909 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
1910 
1911 	rsb_coo_idx_t * ileft  ;
1912 	rsb_coo_idx_t * iright ;
1913 
1914 	rsb_coo_idx_t * bileft  ;
1915 	rsb_coo_idx_t * biright ;
1916 	rsb_coo_idx_t * jleft  ;
1917 	rsb_coo_idx_t * jright ;
1918 
1919 	rsb_coo_idx_t * bjleft  ;
1920 	rsb_coo_idx_t * bjright ;
1921 	size_t tn=0;
1922 	float * left  ;
1923 	float * right ;
1924 
1925 #define LIMIT 1
1926 	if(length==LIMIT)
1927 	{
1928 		*iresult = *iarray;
1929 		*jresult = *jarray;
1930 
1931 		*biresult = *biarray;
1932 		*bjresult = *bjarray;
1933 		*(float*)result = *(float*)array;
1934 	}
1935 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
1936 #undef LIMIT
1937 	middle = length/2;
1938 
1939 
1940 	bileft  = biarray;
1941 	bjleft  = bjarray;
1942 	biright = biarray+middle;
1943 	bjright = bjarray+middle;
1944 	left  = array;
1945 	right  = array+middle;
1946 	ileft  = iarray;
1947 	jleft  = jarray;
1948 	iright = iarray+middle;
1949 	jright = jarray+middle;
1950 
1951 /* 20121016 commented out omp usage because broke serial compilation  */
1952 	{
1953 	rsb_do_mergesort_float_VBR
1954 	( ileft, jleft,
1955 		bileft, bjleft,
1956 		left,   middle,
1957 	        iresult  ,       jresult,
1958 		biresult, bjresult,
1959 		result         );
1960 
1961 	if(tn==1)
1962 	rsb_do_mergesort_float_VBR
1963 	(iright, jright,
1964 		biright, bjright,
1965 		right, length-middle,  iresult+middle  ,jresult+middle,
1966 		biresult+middle, bjresult+middle,
1967 	((float*)result)+middle  );
1968 	}
1969 
1970 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
1971 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
1972 
1973 	RSB_MEMCPY(bileft ,biresult       ,so*middle);
1974 	RSB_MEMCPY(bjleft ,bjresult       ,so*middle);
1975 	RSB_MEMCPY(  left, result       ,sizeof(float)*middle);
1976 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
1977 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
1978 
1979 	RSB_MEMCPY(biright ,biresult+middle       ,so*(length-middle));
1980 	RSB_MEMCPY(bjright ,bjresult+middle       ,so*(length-middle));
1981 	RSB_MEMCPY( right, ((float*)result)+middle ,sizeof(float)*(length-middle));
1982 
1983 	rsb_do_merge_float_VBR		(
1984 			ileft,iright,iresult,
1985 			jleft,jright,jresult,
1986 
1987 			bileft, biright, biresult,
1988 			bjleft, bjright, bjresult,
1989 			left, right, result,
1990 			middle,length-middle
1991 			);
1992 	return RSB_ERR_NO_ERROR;	/* ! */
1993 }
1994 
1995 
1996 
rsb_do_merge_float_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const float * left,const float * restrict right,float * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)1997 void rsb_do_merge_float_VBR(
1998 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
1999 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
2000 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult,		const float* left, const float* restrict right,  float* restrict result,
2001 		rsb_nnz_idx_t left_length,
2002 		rsb_nnz_idx_t right_length )
2003 
2004 
2005 {
2006 	/*!
2007 	 * \ingroup gr_util
2008 	 * The merge function for our VBR matrix coefficients sorting.
2009 	 *
2010 	 * NOTE :This function is the mergesort bottleneck.
2011 	 */
2012 	register int left_index=0, right_index=0, result_index=0;
2013 
2014 	/*
2015 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2016 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2017 	          |<-  length ----->|
2018 	          ^- index
2019 	 */
2020 
2021 
2022 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
2023 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
2024 #define RESULT_ADVANCE		result_index =( result_index+1);
2025 
2026 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL)	\
2027 	iresult[result_index]=(IEL);  \
2028 	jresult[result_index]=(JEL);  \
2029 	result[result_index]=( EL);  \
2030 	biresult[result_index]=(BIEL);  \
2031 	bjresult[result_index]=(BJEL);  \
2032 	RESULT_ADVANCE;
2033 
2034 #define LRESULT_APPEND	\
2035 	iresult[result_index]=ileft[left_index];\
2036 	jresult[result_index]=jleft[left_index];\
2037 	biresult[result_index]=bileft[left_index];  \
2038 	bjresult[result_index]=bjleft[left_index];  \
2039 	result[ result_index]= left[left_index];\
2040 	RESULT_ADVANCE; \
2041 	LEFT_ADVANCE;
2042 
2043 #define RRESULT_APPEND	\
2044 	iresult[result_index]=iright[right_index];\
2045 	jresult[result_index]=jright[right_index];\
2046 	biresult[result_index]=biright[right_index];  \
2047 	bjresult[result_index]=bjright[right_index];  \
2048 	 result[result_index]= right[right_index];\
2049 	RESULT_ADVANCE; \
2050 	RIGHT_ADVANCE;
2051 
2052 	while( left_length > 0 && right_length > 0)
2053 	if(
2054 
2055 		bileft[left_index] < biright[right_index] ||
2056 		(	bileft[left_index] == biright[right_index]	&&
2057 			bjleft[left_index] <= bjright[right_index]	)
2058 		)
2059 					{
2060 		LRESULT_APPEND
2061 	}
2062 	else
2063 	{
2064 		RRESULT_APPEND
2065 	}
2066 
2067 	while( left_length  > 0 )
2068 	{
2069 		LRESULT_APPEND
2070 	}
2071 	while( right_length  > 0 )
2072 	{
2073 		RRESULT_APPEND
2074 	}
2075 #undef LEFT_ADVANCE
2076 #undef RIGHT_ADVANCE
2077 #undef RESULT_ADVANCE
2078 #undef RESULT_APPEND
2079 #undef LRESULT_APPEND
2080 #undef RRESULT_APPEND
2081 
2082 }
2083 
rsb_do_mergesort_float_complex_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,float complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,float complex * restrict result)2084 rsb_err_t rsb_do_mergesort_float_complex_VBR(
2085 	rsb_coo_idx_t *restrict iarray,
2086 	rsb_coo_idx_t *restrict jarray,
2087 	rsb_coo_idx_t *restrict biarray,
2088 	rsb_coo_idx_t *restrict bjarray,
2089 	float complex *array,
2090 	rsb_nnz_idx_t length,
2091 	rsb_coo_idx_t *restrict iresult,
2092 	rsb_coo_idx_t *restrict jresult,
2093 	rsb_coo_idx_t *restrict biresult,
2094 	rsb_coo_idx_t *restrict bjresult,
2095 	float complex *restrict result)
2096 
2097 {
2098 	/*!
2099 	 *	\ingroup gr_util
2100 	 *	This function will sort the non zero elements of a sparse blocked
2101 	 *      float complex matrix.
2102 	 *      It will read row and column indices arrays, the values array,
2103 	 *	and will sort them in separate output arrays.
2104 	 *
2105 	 *	NOTE : This function could be optimized.
2106 	 *
2107 	 * 	\param iarray  the input  row    indices array
2108 	 * 	\param jarray  the input  column indices array
2109 	 * 	\param array   the input  float complex array
2110 	 * 	\param iresult the output row    indices array
2111 	 * 	\param jresult the output column indices array
2112 	 * 	\param result  the output values array
2113 	 * 	\param biarray  the input  block row    indices array
2114 	 * 	\param bjarray  the input  block column indices array
2115 	 * 	\param biresult the output block row    indices array
2116 	 * 	\param bjresult the output block column indices array
2117 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
2118 	 *	criteria :
2119 	 *
2120 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
2121 	 * 	i.e.: C (row major) ordering
2122 	 */
2123 
2124 	rsb_nnz_idx_t middle;
2125 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
2126 
2127 	rsb_coo_idx_t * ileft  ;
2128 	rsb_coo_idx_t * iright ;
2129 
2130 	rsb_coo_idx_t * bileft  ;
2131 	rsb_coo_idx_t * biright ;
2132 	rsb_coo_idx_t * jleft  ;
2133 	rsb_coo_idx_t * jright ;
2134 
2135 	rsb_coo_idx_t * bjleft  ;
2136 	rsb_coo_idx_t * bjright ;
2137 	size_t tn=0;
2138 	float complex * left  ;
2139 	float complex * right ;
2140 
2141 #define LIMIT 1
2142 	if(length==LIMIT)
2143 	{
2144 		*iresult = *iarray;
2145 		*jresult = *jarray;
2146 
2147 		*biresult = *biarray;
2148 		*bjresult = *bjarray;
2149 		*(float complex*)result = *(float complex*)array;
2150 	}
2151 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
2152 #undef LIMIT
2153 	middle = length/2;
2154 
2155 
2156 	bileft  = biarray;
2157 	bjleft  = bjarray;
2158 	biright = biarray+middle;
2159 	bjright = bjarray+middle;
2160 	left  = array;
2161 	right  = array+middle;
2162 	ileft  = iarray;
2163 	jleft  = jarray;
2164 	iright = iarray+middle;
2165 	jright = jarray+middle;
2166 
2167 /* 20121016 commented out omp usage because broke serial compilation  */
2168 	{
2169 	rsb_do_mergesort_float_complex_VBR
2170 	( ileft, jleft,
2171 		bileft, bjleft,
2172 		left,   middle,
2173 	        iresult  ,       jresult,
2174 		biresult, bjresult,
2175 		result         );
2176 
2177 	if(tn==1)
2178 	rsb_do_mergesort_float_complex_VBR
2179 	(iright, jright,
2180 		biright, bjright,
2181 		right, length-middle,  iresult+middle  ,jresult+middle,
2182 		biresult+middle, bjresult+middle,
2183 	((float complex*)result)+middle  );
2184 	}
2185 
2186 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
2187 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
2188 
2189 	RSB_MEMCPY(bileft ,biresult       ,so*middle);
2190 	RSB_MEMCPY(bjleft ,bjresult       ,so*middle);
2191 	RSB_MEMCPY(  left, result       ,sizeof(float complex)*middle);
2192 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
2193 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
2194 
2195 	RSB_MEMCPY(biright ,biresult+middle       ,so*(length-middle));
2196 	RSB_MEMCPY(bjright ,bjresult+middle       ,so*(length-middle));
2197 	RSB_MEMCPY( right, ((float complex*)result)+middle ,sizeof(float complex)*(length-middle));
2198 
2199 	rsb_do_merge_float_complex_VBR		(
2200 			ileft,iright,iresult,
2201 			jleft,jright,jresult,
2202 
2203 			bileft, biright, biresult,
2204 			bjleft, bjright, bjresult,
2205 			left, right, result,
2206 			middle,length-middle
2207 			);
2208 	return RSB_ERR_NO_ERROR;	/* ! */
2209 }
2210 
2211 
2212 
rsb_do_merge_float_complex_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const float complex * left,const float complex * restrict right,float complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)2213 void rsb_do_merge_float_complex_VBR(
2214 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
2215 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
2216 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult,		const float complex* left, const float complex* restrict right,  float complex* restrict result,
2217 		rsb_nnz_idx_t left_length,
2218 		rsb_nnz_idx_t right_length )
2219 
2220 
2221 {
2222 	/*!
2223 	 * \ingroup gr_util
2224 	 * The merge function for our VBR matrix coefficients sorting.
2225 	 *
2226 	 * NOTE :This function is the mergesort bottleneck.
2227 	 */
2228 	register int left_index=0, right_index=0, result_index=0;
2229 
2230 	/*
2231 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2232 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2233 	          |<-  length ----->|
2234 	          ^- index
2235 	 */
2236 
2237 
2238 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
2239 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
2240 #define RESULT_ADVANCE		result_index =( result_index+1);
2241 
2242 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL)	\
2243 	iresult[result_index]=(IEL);  \
2244 	jresult[result_index]=(JEL);  \
2245 	result[result_index]=( EL);  \
2246 	biresult[result_index]=(BIEL);  \
2247 	bjresult[result_index]=(BJEL);  \
2248 	RESULT_ADVANCE;
2249 
2250 #define LRESULT_APPEND	\
2251 	iresult[result_index]=ileft[left_index];\
2252 	jresult[result_index]=jleft[left_index];\
2253 	biresult[result_index]=bileft[left_index];  \
2254 	bjresult[result_index]=bjleft[left_index];  \
2255 	result[ result_index]= left[left_index];\
2256 	RESULT_ADVANCE; \
2257 	LEFT_ADVANCE;
2258 
2259 #define RRESULT_APPEND	\
2260 	iresult[result_index]=iright[right_index];\
2261 	jresult[result_index]=jright[right_index];\
2262 	biresult[result_index]=biright[right_index];  \
2263 	bjresult[result_index]=bjright[right_index];  \
2264 	 result[result_index]= right[right_index];\
2265 	RESULT_ADVANCE; \
2266 	RIGHT_ADVANCE;
2267 
2268 	while( left_length > 0 && right_length > 0)
2269 	if(
2270 
2271 		bileft[left_index] < biright[right_index] ||
2272 		(	bileft[left_index] == biright[right_index]	&&
2273 			bjleft[left_index] <= bjright[right_index]	)
2274 		)
2275 					{
2276 		LRESULT_APPEND
2277 	}
2278 	else
2279 	{
2280 		RRESULT_APPEND
2281 	}
2282 
2283 	while( left_length  > 0 )
2284 	{
2285 		LRESULT_APPEND
2286 	}
2287 	while( right_length  > 0 )
2288 	{
2289 		RRESULT_APPEND
2290 	}
2291 #undef LEFT_ADVANCE
2292 #undef RIGHT_ADVANCE
2293 #undef RESULT_ADVANCE
2294 #undef RESULT_APPEND
2295 #undef LRESULT_APPEND
2296 #undef RRESULT_APPEND
2297 
2298 }
2299 
rsb_do_mergesort_double_complex_VBR(rsb_coo_idx_t * restrict iarray,rsb_coo_idx_t * restrict jarray,rsb_coo_idx_t * restrict biarray,rsb_coo_idx_t * restrict bjarray,double complex * array,rsb_nnz_idx_t length,rsb_coo_idx_t * restrict iresult,rsb_coo_idx_t * restrict jresult,rsb_coo_idx_t * restrict biresult,rsb_coo_idx_t * restrict bjresult,double complex * restrict result)2300 rsb_err_t rsb_do_mergesort_double_complex_VBR(
2301 	rsb_coo_idx_t *restrict iarray,
2302 	rsb_coo_idx_t *restrict jarray,
2303 	rsb_coo_idx_t *restrict biarray,
2304 	rsb_coo_idx_t *restrict bjarray,
2305 	double complex *array,
2306 	rsb_nnz_idx_t length,
2307 	rsb_coo_idx_t *restrict iresult,
2308 	rsb_coo_idx_t *restrict jresult,
2309 	rsb_coo_idx_t *restrict biresult,
2310 	rsb_coo_idx_t *restrict bjresult,
2311 	double complex *restrict result)
2312 
2313 {
2314 	/*!
2315 	 *	\ingroup gr_util
2316 	 *	This function will sort the non zero elements of a sparse blocked
2317 	 *      double complex matrix.
2318 	 *      It will read row and column indices arrays, the values array,
2319 	 *	and will sort them in separate output arrays.
2320 	 *
2321 	 *	NOTE : This function could be optimized.
2322 	 *
2323 	 * 	\param iarray  the input  row    indices array
2324 	 * 	\param jarray  the input  column indices array
2325 	 * 	\param array   the input  double complex array
2326 	 * 	\param iresult the output row    indices array
2327 	 * 	\param jresult the output column indices array
2328 	 * 	\param result  the output values array
2329 	 * 	\param biarray  the input  block row    indices array
2330 	 * 	\param bjarray  the input  block column indices array
2331 	 * 	\param biresult the output block row    indices array
2332 	 * 	\param bjresult the output block column indices array
2333 	 *	Will sort  thethree arrays (iarray, jarray, array) following the
2334 	 *	criteria :
2335 	 *
2336 	 * 	(ia1,ja1)<=(ia2,ja2) iff (ia1<ia2) or ( (ia1==ia2) and (ja1<ja2) )
2337 	 * 	i.e.: C (row major) ordering
2338 	 */
2339 
2340 	rsb_nnz_idx_t middle;
2341 	rsb_coo_idx_t so=sizeof(rsb_coo_idx_t);
2342 
2343 	rsb_coo_idx_t * ileft  ;
2344 	rsb_coo_idx_t * iright ;
2345 
2346 	rsb_coo_idx_t * bileft  ;
2347 	rsb_coo_idx_t * biright ;
2348 	rsb_coo_idx_t * jleft  ;
2349 	rsb_coo_idx_t * jright ;
2350 
2351 	rsb_coo_idx_t * bjleft  ;
2352 	rsb_coo_idx_t * bjright ;
2353 	size_t tn=0;
2354 	double complex * left  ;
2355 	double complex * right ;
2356 
2357 #define LIMIT 1
2358 	if(length==LIMIT)
2359 	{
2360 		*iresult = *iarray;
2361 		*jresult = *jarray;
2362 
2363 		*biresult = *biarray;
2364 		*bjresult = *bjarray;
2365 		*(double complex*)result = *(double complex*)array;
2366 	}
2367 	if(length<=LIMIT) return RSB_ERR_NO_ERROR;
2368 #undef LIMIT
2369 	middle = length/2;
2370 
2371 
2372 	bileft  = biarray;
2373 	bjleft  = bjarray;
2374 	biright = biarray+middle;
2375 	bjright = bjarray+middle;
2376 	left  = array;
2377 	right  = array+middle;
2378 	ileft  = iarray;
2379 	jleft  = jarray;
2380 	iright = iarray+middle;
2381 	jright = jarray+middle;
2382 
2383 /* 20121016 commented out omp usage because broke serial compilation  */
2384 	{
2385 	rsb_do_mergesort_double_complex_VBR
2386 	( ileft, jleft,
2387 		bileft, bjleft,
2388 		left,   middle,
2389 	        iresult  ,       jresult,
2390 		biresult, bjresult,
2391 		result         );
2392 
2393 	if(tn==1)
2394 	rsb_do_mergesort_double_complex_VBR
2395 	(iright, jright,
2396 		biright, bjright,
2397 		right, length-middle,  iresult+middle  ,jresult+middle,
2398 		biresult+middle, bjresult+middle,
2399 	((double complex*)result)+middle  );
2400 	}
2401 
2402 	RSB_MEMCPY(ileft ,iresult       ,so*middle);
2403 	RSB_MEMCPY(jleft ,jresult       ,so*middle);
2404 
2405 	RSB_MEMCPY(bileft ,biresult       ,so*middle);
2406 	RSB_MEMCPY(bjleft ,bjresult       ,so*middle);
2407 	RSB_MEMCPY(  left, result       ,sizeof(double complex)*middle);
2408 	RSB_MEMCPY(iright,iresult+middle,so*(length-middle));
2409 	RSB_MEMCPY(jright,jresult+middle,so*(length-middle));
2410 
2411 	RSB_MEMCPY(biright ,biresult+middle       ,so*(length-middle));
2412 	RSB_MEMCPY(bjright ,bjresult+middle       ,so*(length-middle));
2413 	RSB_MEMCPY( right, ((double complex*)result)+middle ,sizeof(double complex)*(length-middle));
2414 
2415 	rsb_do_merge_double_complex_VBR		(
2416 			ileft,iright,iresult,
2417 			jleft,jright,jresult,
2418 
2419 			bileft, biright, biresult,
2420 			bjleft, bjright, bjresult,
2421 			left, right, result,
2422 			middle,length-middle
2423 			);
2424 	return RSB_ERR_NO_ERROR;	/* ! */
2425 }
2426 
2427 
2428 
rsb_do_merge_double_complex_VBR(const rsb_coo_idx_t * restrict ileft,const rsb_coo_idx_t * restrict iright,rsb_coo_idx_t * restrict iresult,const rsb_coo_idx_t * restrict jleft,const rsb_coo_idx_t * restrict jright,rsb_coo_idx_t * restrict jresult,const rsb_coo_idx_t * restrict bileft,const rsb_coo_idx_t * restrict biright,rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft,const rsb_coo_idx_t * restrict bjright,rsb_coo_idx_t * restrict bjresult,const double complex * left,const double complex * restrict right,double complex * restrict result,rsb_nnz_idx_t left_length,rsb_nnz_idx_t right_length)2429 void rsb_do_merge_double_complex_VBR(
2430 		const rsb_coo_idx_t* restrict ileft, const rsb_coo_idx_t* restrict iright,  rsb_coo_idx_t*restrict iresult,
2431 		const rsb_coo_idx_t* restrict jleft, const rsb_coo_idx_t* restrict jright,  rsb_coo_idx_t*restrict jresult,
2432 const rsb_coo_idx_t * restrict bileft, const rsb_coo_idx_t * restrict biright, rsb_coo_idx_t * restrict biresult,const rsb_coo_idx_t * restrict bjleft, const rsb_coo_idx_t * restrict bjright, rsb_coo_idx_t * restrict bjresult,		const double complex* left, const double complex* restrict right,  double complex* restrict result,
2433 		rsb_nnz_idx_t left_length,
2434 		rsb_nnz_idx_t right_length )
2435 
2436 
2437 {
2438 	/*!
2439 	 * \ingroup gr_util
2440 	 * The merge function for our VBR matrix coefficients sorting.
2441 	 *
2442 	 * NOTE :This function is the mergesort bottleneck.
2443 	 */
2444 	register int left_index=0, right_index=0, result_index=0;
2445 
2446 	/*
2447 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2448 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2449 	          |<-  length ----->|
2450 	          ^- index
2451 	 */
2452 
2453 
2454 #define LEFT_ADVANCE		left_index =( left_index+1); left_length-- ;
2455 #define RIGHT_ADVANCE		right_index=(right_index+1); right_length--;
2456 #define RESULT_ADVANCE		result_index =( result_index+1);
2457 
2458 #define RESULT_APPEND(IEL,JEL,BIEL,BJEL,EL)	\
2459 	iresult[result_index]=(IEL);  \
2460 	jresult[result_index]=(JEL);  \
2461 	result[result_index]=( EL);  \
2462 	biresult[result_index]=(BIEL);  \
2463 	bjresult[result_index]=(BJEL);  \
2464 	RESULT_ADVANCE;
2465 
2466 #define LRESULT_APPEND	\
2467 	iresult[result_index]=ileft[left_index];\
2468 	jresult[result_index]=jleft[left_index];\
2469 	biresult[result_index]=bileft[left_index];  \
2470 	bjresult[result_index]=bjleft[left_index];  \
2471 	result[ result_index]= left[left_index];\
2472 	RESULT_ADVANCE; \
2473 	LEFT_ADVANCE;
2474 
2475 #define RRESULT_APPEND	\
2476 	iresult[result_index]=iright[right_index];\
2477 	jresult[result_index]=jright[right_index];\
2478 	biresult[result_index]=biright[right_index];  \
2479 	bjresult[result_index]=bjright[right_index];  \
2480 	 result[result_index]= right[right_index];\
2481 	RESULT_ADVANCE; \
2482 	RIGHT_ADVANCE;
2483 
2484 	while( left_length > 0 && right_length > 0)
2485 	if(
2486 
2487 		bileft[left_index] < biright[right_index] ||
2488 		(	bileft[left_index] == biright[right_index]	&&
2489 			bjleft[left_index] <= bjright[right_index]	)
2490 		)
2491 					{
2492 		LRESULT_APPEND
2493 	}
2494 	else
2495 	{
2496 		RRESULT_APPEND
2497 	}
2498 
2499 	while( left_length  > 0 )
2500 	{
2501 		LRESULT_APPEND
2502 	}
2503 	while( right_length  > 0 )
2504 	{
2505 		RRESULT_APPEND
2506 	}
2507 #undef LEFT_ADVANCE
2508 #undef RIGHT_ADVANCE
2509 #undef RESULT_ADVANCE
2510 #undef RESULT_APPEND
2511 #undef LRESULT_APPEND
2512 #undef RRESULT_APPEND
2513 
2514 }
2515 
2516 #ifdef __cplusplus
2517 }
2518 #endif  /* __cplusplus */
2519 
2520 /* @endcond */
2521