1 /* @cond INNERDOC */
2 
3 /**
4  * @file
5  * @brief
6  * Auxiliary functions.
7  */
8 
9 /*
10 
11 Copyright (C) 2008-2020 Michele Martone
12 
13 This file is part of librsb.
14 
15 librsb is free software; you can redistribute it and/or modify it
16 under the terms of the GNU Lesser General Public License as published
17 by the Free Software Foundation; either version 3 of the License, or
18 (at your option) any later version.
19 
20 librsb is distributed in the hope that it will be useful, but WITHOUT
21 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
23 License for more details.
24 
25 You should have received a copy of the GNU Lesser General Public
26 License along with librsb; see the file COPYING.
27 If not, see <http://www.gnu.org/licenses/>.
28 
29 */
30 /*
31  The code in this file was generated automatically by an M4 script.
32  It is not meant to be used as an API (Application Programming Interface).
33  p.s.: right now, only row major matrix access is considered.
34 
35  */
36 
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif /* __cplusplus */
41 
42 #define RSB_WANT_OMP        1
43 #define RSB_MAX_OMP_THREADS 4
44 #include <omp.h>       /* OpenMP parallelism (EXPERIMENTAL) */
45 
46 
47 #include "rsb_common.h"
48 /* non blas-like functions */
49 
rsb__util_m4_sanity_check(void)50 rsb_err_t rsb__util_m4_sanity_check(void){
51 	/**
52 		There are bugs in the m4 macros or a bad m4 implementation which will trigger this test to fail.
53 		We are interested in catching them, as we should rely on a sane m4 environment.
54 	*/
55 	/* generated by the $0 macro */
56 
57 	if(
58 		0!=0 ||
59 		1!=1 ||
60 		1!=1 ||
61 		0!=0 ||
62 		0!=0 ||
63 		0!=0 ||
64 		0!=0 ||
65 		1!=1 ||
66 		0!=0 ||
67 		1!=1 ||
68 		1!=1 ||
69 		1!=1 ||
70 		0
71 		)
72 		goto err;
73 	return RSB_ERR_NO_ERROR;
74 err:
75 	return RSB_ERR_INTERNAL_ERROR;
76 }
77 
rsb__util_increase_by_one(void * p,rsb_nnz_idx_t n,rsb_flags_t typecode)78 const void * rsb__util_increase_by_one(void *p, rsb_nnz_idx_t n, rsb_flags_t typecode){
79 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
80 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  ) {(((double*)p)[n])+=1;return p;}
81 	else
82 #endif
83 #ifdef RSB_NUMERICAL_TYPE_FLOAT
84 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  ) {(((float*)p)[n])+=1;return p;}
85 	else
86 #endif
87 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
88 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ) {(((float complex*)p)[n])+=1;return p;}
89 	else
90 #endif
91 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
92 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ) {(((double complex*)p)[n])+=1;return p;}
93 	else
94 #endif
95 	return NULL;
96 }
97 
rsb__util_set_area_to_fraction_of_integer(void * p,const int alphai,rsb_flags_t typecode)98 void rsb__util_set_area_to_fraction_of_integer(void *p, const int alphai, rsb_flags_t typecode){
99 	/*
100 		alpha NULL will imply 1
101 	*/
102 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
103 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  ) {*(double*)p = 1;*(double*)p/=alphai;}
104 	else
105 #endif
106 #ifdef RSB_NUMERICAL_TYPE_FLOAT
107 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  ) {*(float*)p = 1;*(float*)p/=alphai;}
108 	else
109 #endif
110 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
111 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ) {*(float complex*)p = 1;*(float complex*)p/=alphai;}
112 	else
113 #endif
114 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
115 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ) {*(double complex*)p = 1;*(double complex*)p/=alphai;}
116 	else
117 #endif
118 	return;
119 }
120 
rsb__util_set_area_to_negated_fraction(void * p,const void * alpha,rsb_flags_t typecode)121 void rsb__util_set_area_to_negated_fraction(void *p, const void *alpha, rsb_flags_t typecode){
122 	/*
123 		alpha NULL will imply 1
124 	*/
125 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
126 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  ) {*(double*)p = -1;if(alpha)*(double*)p/=(*(double*)alpha);}
127 	else
128 #endif
129 #ifdef RSB_NUMERICAL_TYPE_FLOAT
130 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  ) {*(float*)p = -1;if(alpha)*(float*)p/=(*(float*)alpha);}
131 	else
132 #endif
133 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
134 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ) {*(float complex*)p = -1;if(alpha)*(float complex*)p/=(*(float complex*)alpha);}
135 	else
136 #endif
137 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
138 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ) {*(double complex*)p = -1;if(alpha)*(double complex*)p/=(*(double complex*)alpha);}
139 	else
140 #endif
141 	return;
142 }
143 
rsb__util_set_area_to_converted_integer(void * p,rsb_flags_t typecode,const rsb_int n)144 void rsb__util_set_area_to_converted_integer(void *p, rsb_flags_t typecode, const rsb_int n){
145 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
146 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  ) {*(double*)p = (double)n;}
147 	else
148 #endif
149 #ifdef RSB_NUMERICAL_TYPE_FLOAT
150 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  ) {*(float*)p = (float)n;}
151 	else
152 #endif
153 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
154 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ) {*(float complex*)p = (float complex)n;}
155 	else
156 #endif
157 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
158 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ) {*(double complex*)p = (double complex)n;}
159 	else
160 #endif
161 	return;
162 }
163 
rsb__util_get_partitioning_array(size_t bs,size_t X,rsb_blk_idx_t * X_b,rsb_flags_t flags)164 rsb_coo_idx_t * rsb__util_get_partitioning_array( size_t bs, size_t X , rsb_blk_idx_t * X_b, rsb_flags_t flags){
165 	/*!
166 	 * Given a block size (be it rows or columns), an element size X in bytes,
167 	 * and a dimension (rows or columns), returns an array containing the
168 	 * indices of the elements in each block.
169 	 *
170 	 * Therefore, the allocated arrays
171 	 *
172 	 * \param bs	the block size
173 	 * \param X	the rows or columns count
174 	 * \param X_b	on output, the allocated array elements count : (X+bs-1)/bs
175 	 * \return NULL on error;  a valid array pointer on success
176 	 *
177 	 * FIXME : why not size_t ? or maybe rsb_size_t ?
178 	 * */
179 	size_t i;
180 	rsb_err_t errval = RSB_ERR_NO_ERROR;
181 	rsb_coo_idx_t * p_x = NULL;
182 
183 	*X_b = (X+bs-1)/bs;
184 
185 	/* WARNING : 1 is the extreme limit before overflow :) */
186 	if( ( ((size_t)(*X_b)) < ((size_t)((X+bs-1)/bs))) || (RSB_BLK_ADD_OVERFLOW(*X_b,1)) )
187 	{
188 		/* overflow. should print some message. */
189 		errval = RSB_ERR_LIMITS;goto err;
190 	}
191 
192 	p_x = rsb__malloc(sizeof(rsb_coo_idx_t)*(*X_b+1));
193 	if(! p_x) goto err;
194 	/* note: should use some perrno some day */
195 
196 	/* note the last block size : it is the same, regardless congruences */
197 	{
198 for(i=0;i+15<*X_b;i+=16){
199 p_x[i+0 ] = (i+0 )*bs;
200 	p_x[i+1 ] = (i+1 )*bs;
201 	p_x[i+2 ] = (i+2 )*bs;
202 	p_x[i+3 ] = (i+3 )*bs;
203 	p_x[i+4 ] = (i+4 )*bs;
204 	p_x[i+5 ] = (i+5 )*bs;
205 	p_x[i+6 ] = (i+6 )*bs;
206 	p_x[i+7 ] = (i+7 )*bs;
207 	p_x[i+8 ] = (i+8 )*bs;
208 	p_x[i+9 ] = (i+9 )*bs;
209 	p_x[i+10 ] = (i+10 )*bs;
210 	p_x[i+11 ] = (i+11 )*bs;
211 	p_x[i+12 ] = (i+12 )*bs;
212 	p_x[i+13 ] = (i+13 )*bs;
213 	p_x[i+14 ] = (i+14 )*bs;
214 	p_x[i+15 ] = (i+15 )*bs;
215 	}
216 for(     ;i<*X_b;++i){ p_x[i+0 ] = (i+0 )*bs;
217 	 }
218 }
219 
220 
221 	/* FIXME : this point should be remarked and documented way better ! */
222 	if(flags&(RSB_FLAG_WANT_BCSS_STORAGE|RSB_FLAG_WANT_FIXED_BLOCKING_VBR))
223 		p_x[*X_b] = *X_b*bs;	/* the last element of p_x is the index of the last matrix row/column    + 1  */
224 	else
225 		p_x[*X_b] = X;	/* the last element of p_x is the index of the last matrix row/column    + 1  */
226 
227 	return p_x;
228 err:
229 	RSB_CONDITIONAL_FREE(p_x);
230 	rsb__do_perror(NULL,errval);
231 	return NULL;
232 }
233 
rsb__vector_diff(void * c,const void * a,const void * b,rsb_type_t type,size_t n)234 rsb_err_t rsb__vector_diff(void * c, const void * a, const void * b, rsb_type_t type, size_t n){
235 	/*!
236 	 * c <- a-b
237          *
238 	 * \param array	an array pointer
239 	 * \param type	a valid type code
240 	 * \param n	the input array length
241 	 * \note see daxpy,dcopy in BLAS
242 	 *
243 	 * \return \rsberrcodemsg
244 	 * */
245 	size_t i;
246 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
247 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
248 	{
249 		const double*ta = a,*tb = b;double *tc = c;
250 		{
251 for(i=0;i+15<n;i+=16){
252 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
253 			tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
254 			tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
255 			tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
256 			tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
257 			tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
258 			tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
259 			tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
260 			tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
261 			tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
262 			tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
263 			tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
264 			tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
265 			tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
266 			tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
267 			tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
268 	}
269 for(     ;i<n;++i){ 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
270 	 }
271 }
272 ;
273 	}
274 	else
275 #endif
276 #ifdef RSB_NUMERICAL_TYPE_FLOAT
277 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
278 	{
279 		const float*ta = a,*tb = b;float *tc = c;
280 		{
281 for(i=0;i+15<n;i+=16){
282 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
283 			tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
284 			tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
285 			tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
286 			tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
287 			tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
288 			tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
289 			tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
290 			tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
291 			tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
292 			tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
293 			tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
294 			tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
295 			tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
296 			tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
297 			tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
298 	}
299 for(     ;i<n;++i){ 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
300 	 }
301 }
302 ;
303 	}
304 	else
305 #endif
306 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
307 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
308 	{
309 		const float complex*ta = a,*tb = b;float complex *tc = c;
310 		{
311 for(i=0;i+15<n;i+=16){
312 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
313 			tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
314 			tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
315 			tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
316 			tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
317 			tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
318 			tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
319 			tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
320 			tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
321 			tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
322 			tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
323 			tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
324 			tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
325 			tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
326 			tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
327 			tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
328 	}
329 for(     ;i<n;++i){ 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
330 	 }
331 }
332 ;
333 	}
334 	else
335 #endif
336 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
337 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
338 	{
339 		const double complex*ta = a,*tb = b;double complex *tc = c;
340 		{
341 for(i=0;i+15<n;i+=16){
342 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
343 			tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
344 			tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
345 			tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
346 			tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
347 			tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
348 			tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
349 			tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
350 			tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
351 			tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
352 			tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
353 			tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
354 			tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
355 			tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
356 			tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
357 			tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
358 	}
359 for(     ;i<n;++i){ 		tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
360 	 }
361 }
362 ;
363 	}
364 	else
365 #endif
366 	return RSB_ERR_UNSUPPORTED_TYPE	;
367 	return RSB_ERR_NO_ERROR;
368 }
369 
rsb_vector_norm_square(void * c,const void * a,rsb_type_t type,size_t n)370 static rsb_err_t rsb_vector_norm_square(void * c, const void * a, rsb_type_t type, size_t n)
371 {
372 	/*!
373 	 * c <- a^T*a
374          *
375 	 * \param a	an array pointer
376 	 * \param type	a valid type code
377 	 * \param n	the input array length
378 	 * \note see ddot in BLAS
379 	 *
380 	 * \return \rsberrcodemsg
381 	 * */
382 	size_t i;
383 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
384 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
385 	{
386 		const double*ta = a;double *tc = c;
387 		tc[0] = ((double)(0));
388 		{
389 for(i=0;i+15<n;i+=16){
390 		tc[0]+=ta[i+0 ]*ta[i+0 ];
391 			tc[0]+=ta[i+1 ]*ta[i+1 ];
392 			tc[0]+=ta[i+2 ]*ta[i+2 ];
393 			tc[0]+=ta[i+3 ]*ta[i+3 ];
394 			tc[0]+=ta[i+4 ]*ta[i+4 ];
395 			tc[0]+=ta[i+5 ]*ta[i+5 ];
396 			tc[0]+=ta[i+6 ]*ta[i+6 ];
397 			tc[0]+=ta[i+7 ]*ta[i+7 ];
398 			tc[0]+=ta[i+8 ]*ta[i+8 ];
399 			tc[0]+=ta[i+9 ]*ta[i+9 ];
400 			tc[0]+=ta[i+10 ]*ta[i+10 ];
401 			tc[0]+=ta[i+11 ]*ta[i+11 ];
402 			tc[0]+=ta[i+12 ]*ta[i+12 ];
403 			tc[0]+=ta[i+13 ]*ta[i+13 ];
404 			tc[0]+=ta[i+14 ]*ta[i+14 ];
405 			tc[0]+=ta[i+15 ]*ta[i+15 ];
406 	}
407 for(     ;i<n;++i){ 		tc[0]+=ta[i+0 ]*ta[i+0 ];
408 	 }
409 }
410 ;
411 	}
412 	else
413 #endif
414 #ifdef RSB_NUMERICAL_TYPE_FLOAT
415 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
416 	{
417 		const float*ta = a;float *tc = c;
418 		tc[0] = ((float)(0));
419 		{
420 for(i=0;i+15<n;i+=16){
421 		tc[0]+=ta[i+0 ]*ta[i+0 ];
422 			tc[0]+=ta[i+1 ]*ta[i+1 ];
423 			tc[0]+=ta[i+2 ]*ta[i+2 ];
424 			tc[0]+=ta[i+3 ]*ta[i+3 ];
425 			tc[0]+=ta[i+4 ]*ta[i+4 ];
426 			tc[0]+=ta[i+5 ]*ta[i+5 ];
427 			tc[0]+=ta[i+6 ]*ta[i+6 ];
428 			tc[0]+=ta[i+7 ]*ta[i+7 ];
429 			tc[0]+=ta[i+8 ]*ta[i+8 ];
430 			tc[0]+=ta[i+9 ]*ta[i+9 ];
431 			tc[0]+=ta[i+10 ]*ta[i+10 ];
432 			tc[0]+=ta[i+11 ]*ta[i+11 ];
433 			tc[0]+=ta[i+12 ]*ta[i+12 ];
434 			tc[0]+=ta[i+13 ]*ta[i+13 ];
435 			tc[0]+=ta[i+14 ]*ta[i+14 ];
436 			tc[0]+=ta[i+15 ]*ta[i+15 ];
437 	}
438 for(     ;i<n;++i){ 		tc[0]+=ta[i+0 ]*ta[i+0 ];
439 	 }
440 }
441 ;
442 	}
443 	else
444 #endif
445 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
446 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
447 	{
448 		const float complex*ta = a;float complex *tc = c;
449 		tc[0] = ((float complex)(0));
450 		{
451 for(i=0;i+15<n;i+=16){
452 		tc[0]+=ta[i+0 ]*ta[i+0 ];
453 			tc[0]+=ta[i+1 ]*ta[i+1 ];
454 			tc[0]+=ta[i+2 ]*ta[i+2 ];
455 			tc[0]+=ta[i+3 ]*ta[i+3 ];
456 			tc[0]+=ta[i+4 ]*ta[i+4 ];
457 			tc[0]+=ta[i+5 ]*ta[i+5 ];
458 			tc[0]+=ta[i+6 ]*ta[i+6 ];
459 			tc[0]+=ta[i+7 ]*ta[i+7 ];
460 			tc[0]+=ta[i+8 ]*ta[i+8 ];
461 			tc[0]+=ta[i+9 ]*ta[i+9 ];
462 			tc[0]+=ta[i+10 ]*ta[i+10 ];
463 			tc[0]+=ta[i+11 ]*ta[i+11 ];
464 			tc[0]+=ta[i+12 ]*ta[i+12 ];
465 			tc[0]+=ta[i+13 ]*ta[i+13 ];
466 			tc[0]+=ta[i+14 ]*ta[i+14 ];
467 			tc[0]+=ta[i+15 ]*ta[i+15 ];
468 	}
469 for(     ;i<n;++i){ 		tc[0]+=ta[i+0 ]*ta[i+0 ];
470 	 }
471 }
472 ;
473 	}
474 	else
475 #endif
476 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
477 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
478 	{
479 		const double complex*ta = a;double complex *tc = c;
480 		tc[0] = ((double complex)(0));
481 		{
482 for(i=0;i+15<n;i+=16){
483 		tc[0]+=ta[i+0 ]*ta[i+0 ];
484 			tc[0]+=ta[i+1 ]*ta[i+1 ];
485 			tc[0]+=ta[i+2 ]*ta[i+2 ];
486 			tc[0]+=ta[i+3 ]*ta[i+3 ];
487 			tc[0]+=ta[i+4 ]*ta[i+4 ];
488 			tc[0]+=ta[i+5 ]*ta[i+5 ];
489 			tc[0]+=ta[i+6 ]*ta[i+6 ];
490 			tc[0]+=ta[i+7 ]*ta[i+7 ];
491 			tc[0]+=ta[i+8 ]*ta[i+8 ];
492 			tc[0]+=ta[i+9 ]*ta[i+9 ];
493 			tc[0]+=ta[i+10 ]*ta[i+10 ];
494 			tc[0]+=ta[i+11 ]*ta[i+11 ];
495 			tc[0]+=ta[i+12 ]*ta[i+12 ];
496 			tc[0]+=ta[i+13 ]*ta[i+13 ];
497 			tc[0]+=ta[i+14 ]*ta[i+14 ];
498 			tc[0]+=ta[i+15 ]*ta[i+15 ];
499 	}
500 for(     ;i<n;++i){ 		tc[0]+=ta[i+0 ]*ta[i+0 ];
501 	 }
502 }
503 ;
504 	}
505 	else
506 #endif
507 	return RSB_ERR_UNSUPPORTED_TYPE	;
508 	return RSB_ERR_NO_ERROR;
509 }
510 
rsb_vector_norm(void * c,const void * a,rsb_type_t type,size_t n)511 static rsb_err_t rsb_vector_norm(void * c, const void * a, rsb_type_t type, size_t n)
512 {
513 	/*!
514 	 * c <- sqrt(a^T*a)
515          *
516 	 * \param array	an array pointer
517 	 * \param type	a valid type code
518 	 * \param n	the input array length
519 	 * \note see ddot in BLAS
520 	 *
521 	 * \return \rsberrcodemsg
522 	 * */
523 	rsb_err_t errval;
524 	if(!c)
525 		return RSB_ERR_BADARGS;
526 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
527 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
528 	{
529 		double*cp = (double*)c;
530 		errval = rsb_vector_norm_square(cp,a,type,n);
531 		*cp = sqrt(*cp);
532 	}
533 	else
534 #endif
535 #ifdef RSB_NUMERICAL_TYPE_FLOAT
536 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
537 	{
538 		float*cp = (float*)c;
539 		errval = rsb_vector_norm_square(cp,a,type,n);
540 		*cp = sqrtf(*cp);
541 	}
542 	else
543 #endif
544 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
545 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
546 	{
547 		float complex*cp = (float complex*)c;
548 		errval = rsb_vector_norm_square(cp,a,type,n);
549 		*cp = csqrtf(*cp);
550 	}
551 	else
552 #endif
553 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
554 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
555 	{
556 		double complex*cp = (double complex*)c;
557 		errval = rsb_vector_norm_square(cp,a,type,n);
558 		*cp = csqrt(*cp);
559 	}
560 	else
561 #endif
562 		errval = RSB_ERR_UNSUPPORTED_TYPE;
563 	RSB_DO_ERR_RETURN(errval)
564 }
565 
rsb_vector_norm_square_strided(void * c,const void * a,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)566 static rsb_err_t rsb_vector_norm_square_strided(void * c, const void * a, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
567 {
568 	/*!
569 	 * c <- a^T*a
570          *
571 	 * \param array	an array pointer
572 	 * \param type	a valid type code
573 	 * \param n	the input array length
574 	 * \note see ddot in BLAS
575 	 *
576 	 * \return \rsberrcodemsg
577 	 * */
578 	size_t i;
579 	if(inc==1)
580 		return rsb_vector_norm_square(c,a,type,n);
581 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
582 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
583 	{
584 		const double*ta = a;double *tc = c;
585 		tc[0] = ((double)(0));
586 		{
587 for(i=0;i+15<n;i+=16){
588 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
589 			tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
590 			tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
591 			tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
592 			tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
593 			tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
594 			tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
595 			tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
596 			tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
597 			tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
598 			tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
599 			tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
600 			tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
601 			tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
602 			tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
603 			tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
604 	}
605 for(     ;i<n;++i){ 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
606 	 }
607 }
608 ;
609 	}
610 	else
611 #endif
612 #ifdef RSB_NUMERICAL_TYPE_FLOAT
613 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
614 	{
615 		const float*ta = a;float *tc = c;
616 		tc[0] = ((float)(0));
617 		{
618 for(i=0;i+15<n;i+=16){
619 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
620 			tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
621 			tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
622 			tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
623 			tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
624 			tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
625 			tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
626 			tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
627 			tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
628 			tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
629 			tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
630 			tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
631 			tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
632 			tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
633 			tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
634 			tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
635 	}
636 for(     ;i<n;++i){ 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
637 	 }
638 }
639 ;
640 	}
641 	else
642 #endif
643 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
644 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
645 	{
646 		const float complex*ta = a;float complex *tc = c;
647 		tc[0] = ((float complex)(0));
648 		{
649 for(i=0;i+15<n;i+=16){
650 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
651 			tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
652 			tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
653 			tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
654 			tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
655 			tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
656 			tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
657 			tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
658 			tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
659 			tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
660 			tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
661 			tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
662 			tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
663 			tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
664 			tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
665 			tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
666 	}
667 for(     ;i<n;++i){ 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
668 	 }
669 }
670 ;
671 	}
672 	else
673 #endif
674 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
675 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
676 	{
677 		const double complex*ta = a;double complex *tc = c;
678 		tc[0] = ((double complex)(0));
679 		{
680 for(i=0;i+15<n;i+=16){
681 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
682 			tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
683 			tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
684 			tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
685 			tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
686 			tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
687 			tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
688 			tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
689 			tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
690 			tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
691 			tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
692 			tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
693 			tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
694 			tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
695 			tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
696 			tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
697 	}
698 for(     ;i<n;++i){ 		tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
699 	 }
700 }
701 ;
702 	}
703 	else
704 #endif
705 	return RSB_ERR_UNSUPPORTED_TYPE	;
706 	return RSB_ERR_NO_ERROR;
707 }
708 
rsb__vector_norm_strided(void * c,const void * a,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)709 rsb_err_t rsb__vector_norm_strided(void * c, const void * a, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
710 {
711 	/*!
712 	 * c <- sqrt(a^T*a)
713          *
714 	 * \param array	an array pointer
715 	 * \param type	a valid type code
716 	 * \param n	the input array length
717 	 * \note see ddot in BLAS
718 	 *
719 	 * \return \rsberrcodemsg
720 	 * */
721 	rsb_err_t errval;
722 	if(!c)
723 		return RSB_ERR_BADARGS;
724 	if(inc==1)
725 		return rsb_vector_norm(c,a,type,n);
726 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
727 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
728 	{
729 		double*cp = (double*)c;
730 		errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
731 		*cp = sqrt(*cp);
732 	}
733 	else
734 #endif
735 #ifdef RSB_NUMERICAL_TYPE_FLOAT
736 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
737 	{
738 		float*cp = (float*)c;
739 		errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
740 		*cp = sqrtf(*cp);
741 	}
742 	else
743 #endif
744 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
745 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
746 	{
747 		float complex*cp = (float complex*)c;
748 		errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
749 		*cp = csqrtf(*cp);
750 	}
751 	else
752 #endif
753 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
754 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
755 	{
756 		double complex*cp = (double complex*)c;
757 		errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
758 		*cp = csqrt(*cp);
759 	}
760 	else
761 #endif
762 		errval = RSB_ERR_UNSUPPORTED_TYPE;
763 	RSB_DO_ERR_RETURN(errval)
764 }
765 
rsb__util_vector_sum_strided(void * c,const void * a,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)766 rsb_err_t rsb__util_vector_sum_strided(void * c, const void * a, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
767 {
768 	/*!
769 	 * c <- sum(a)
770          *
771 	 * \param array	an array pointer
772 	 * \param type	a valid type code
773 	 * \param n	the input array length
774 	 * \note see ddot in BLAS
775 	 *
776 	 * \return \rsberrcodemsg
777 	 * */
778 	size_t i;
779 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
780 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
781 	{
782 		register double acc = ((double)(0)); const double*ta = a; double*tc = c;
783 	{
784 for(i=0;i+15<n;i+=16){
785 		acc+=ta[(i+0 )*inc];
786 			acc+=ta[(i+1 )*inc];
787 			acc+=ta[(i+2 )*inc];
788 			acc+=ta[(i+3 )*inc];
789 			acc+=ta[(i+4 )*inc];
790 			acc+=ta[(i+5 )*inc];
791 			acc+=ta[(i+6 )*inc];
792 			acc+=ta[(i+7 )*inc];
793 			acc+=ta[(i+8 )*inc];
794 			acc+=ta[(i+9 )*inc];
795 			acc+=ta[(i+10 )*inc];
796 			acc+=ta[(i+11 )*inc];
797 			acc+=ta[(i+12 )*inc];
798 			acc+=ta[(i+13 )*inc];
799 			acc+=ta[(i+14 )*inc];
800 			acc+=ta[(i+15 )*inc];
801 	}
802 for(     ;i<n;++i){ 		acc+=ta[(i+0 )*inc];
803 	 }
804 }
805 ;
806 		tc[0] = acc;
807 	}
808 	else
809 #endif
810 #ifdef RSB_NUMERICAL_TYPE_FLOAT
811 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
812 	{
813 		register float acc = ((float)(0)); const float*ta = a; float*tc = c;
814 	{
815 for(i=0;i+15<n;i+=16){
816 		acc+=ta[(i+0 )*inc];
817 			acc+=ta[(i+1 )*inc];
818 			acc+=ta[(i+2 )*inc];
819 			acc+=ta[(i+3 )*inc];
820 			acc+=ta[(i+4 )*inc];
821 			acc+=ta[(i+5 )*inc];
822 			acc+=ta[(i+6 )*inc];
823 			acc+=ta[(i+7 )*inc];
824 			acc+=ta[(i+8 )*inc];
825 			acc+=ta[(i+9 )*inc];
826 			acc+=ta[(i+10 )*inc];
827 			acc+=ta[(i+11 )*inc];
828 			acc+=ta[(i+12 )*inc];
829 			acc+=ta[(i+13 )*inc];
830 			acc+=ta[(i+14 )*inc];
831 			acc+=ta[(i+15 )*inc];
832 	}
833 for(     ;i<n;++i){ 		acc+=ta[(i+0 )*inc];
834 	 }
835 }
836 ;
837 		tc[0] = acc;
838 	}
839 	else
840 #endif
841 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
842 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
843 	{
844 		register float complex acc = ((float complex)(0)); const float complex*ta = a; float complex*tc = c;
845 	{
846 for(i=0;i+15<n;i+=16){
847 		acc+=ta[(i+0 )*inc];
848 			acc+=ta[(i+1 )*inc];
849 			acc+=ta[(i+2 )*inc];
850 			acc+=ta[(i+3 )*inc];
851 			acc+=ta[(i+4 )*inc];
852 			acc+=ta[(i+5 )*inc];
853 			acc+=ta[(i+6 )*inc];
854 			acc+=ta[(i+7 )*inc];
855 			acc+=ta[(i+8 )*inc];
856 			acc+=ta[(i+9 )*inc];
857 			acc+=ta[(i+10 )*inc];
858 			acc+=ta[(i+11 )*inc];
859 			acc+=ta[(i+12 )*inc];
860 			acc+=ta[(i+13 )*inc];
861 			acc+=ta[(i+14 )*inc];
862 			acc+=ta[(i+15 )*inc];
863 	}
864 for(     ;i<n;++i){ 		acc+=ta[(i+0 )*inc];
865 	 }
866 }
867 ;
868 		tc[0] = acc;
869 	}
870 	else
871 #endif
872 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
873 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
874 	{
875 		register double complex acc = ((double complex)(0)); const double complex*ta = a; double complex*tc = c;
876 	{
877 for(i=0;i+15<n;i+=16){
878 		acc+=ta[(i+0 )*inc];
879 			acc+=ta[(i+1 )*inc];
880 			acc+=ta[(i+2 )*inc];
881 			acc+=ta[(i+3 )*inc];
882 			acc+=ta[(i+4 )*inc];
883 			acc+=ta[(i+5 )*inc];
884 			acc+=ta[(i+6 )*inc];
885 			acc+=ta[(i+7 )*inc];
886 			acc+=ta[(i+8 )*inc];
887 			acc+=ta[(i+9 )*inc];
888 			acc+=ta[(i+10 )*inc];
889 			acc+=ta[(i+11 )*inc];
890 			acc+=ta[(i+12 )*inc];
891 			acc+=ta[(i+13 )*inc];
892 			acc+=ta[(i+14 )*inc];
893 			acc+=ta[(i+15 )*inc];
894 	}
895 for(     ;i<n;++i){ 		acc+=ta[(i+0 )*inc];
896 	 }
897 }
898 ;
899 		tc[0] = acc;
900 	}
901 	else
902 #endif
903 	return RSB_ERR_UNSUPPORTED_TYPE	;
904 	return RSB_ERR_NO_ERROR;
905 }
906 
rsb__util_vector_sum(void * c,const void * a,rsb_type_t type,size_t n)907 rsb_err_t rsb__util_vector_sum(void * c, const void * a, rsb_type_t type, size_t n)
908 {
909 	/*!
910 	 * c <- sum(a)
911          *
912 	 * \param array	an array pointer
913 	 * \param type	a valid type code
914 	 * \param n	the input array length
915 	 * \note see ddot in BLAS
916 	 *
917 	 * \return \rsberrcodemsg
918 	 * */
919 	size_t i;
920 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
921 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
922 	{
923 	const double*ta = a; double*tc = c; tc[0] = ((double)(0));
924 	{
925 for(i=0;i+15<n;i+=16){
926 	tc[0]+=ta[i+0 ];
927 		tc[0]+=ta[i+1 ];
928 		tc[0]+=ta[i+2 ];
929 		tc[0]+=ta[i+3 ];
930 		tc[0]+=ta[i+4 ];
931 		tc[0]+=ta[i+5 ];
932 		tc[0]+=ta[i+6 ];
933 		tc[0]+=ta[i+7 ];
934 		tc[0]+=ta[i+8 ];
935 		tc[0]+=ta[i+9 ];
936 		tc[0]+=ta[i+10 ];
937 		tc[0]+=ta[i+11 ];
938 		tc[0]+=ta[i+12 ];
939 		tc[0]+=ta[i+13 ];
940 		tc[0]+=ta[i+14 ];
941 		tc[0]+=ta[i+15 ];
942 	}
943 for(     ;i<n;++i){ 	tc[0]+=ta[i+0 ];
944 	 }
945 }
946 ;
947 	}
948 	else
949 #endif
950 #ifdef RSB_NUMERICAL_TYPE_FLOAT
951 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
952 	{
953 	const float*ta = a; float*tc = c; tc[0] = ((float)(0));
954 	{
955 for(i=0;i+15<n;i+=16){
956 	tc[0]+=ta[i+0 ];
957 		tc[0]+=ta[i+1 ];
958 		tc[0]+=ta[i+2 ];
959 		tc[0]+=ta[i+3 ];
960 		tc[0]+=ta[i+4 ];
961 		tc[0]+=ta[i+5 ];
962 		tc[0]+=ta[i+6 ];
963 		tc[0]+=ta[i+7 ];
964 		tc[0]+=ta[i+8 ];
965 		tc[0]+=ta[i+9 ];
966 		tc[0]+=ta[i+10 ];
967 		tc[0]+=ta[i+11 ];
968 		tc[0]+=ta[i+12 ];
969 		tc[0]+=ta[i+13 ];
970 		tc[0]+=ta[i+14 ];
971 		tc[0]+=ta[i+15 ];
972 	}
973 for(     ;i<n;++i){ 	tc[0]+=ta[i+0 ];
974 	 }
975 }
976 ;
977 	}
978 	else
979 #endif
980 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
981 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
982 	{
983 	const float complex*ta = a; float complex*tc = c; tc[0] = ((float complex)(0));
984 	{
985 for(i=0;i+15<n;i+=16){
986 	tc[0]+=ta[i+0 ];
987 		tc[0]+=ta[i+1 ];
988 		tc[0]+=ta[i+2 ];
989 		tc[0]+=ta[i+3 ];
990 		tc[0]+=ta[i+4 ];
991 		tc[0]+=ta[i+5 ];
992 		tc[0]+=ta[i+6 ];
993 		tc[0]+=ta[i+7 ];
994 		tc[0]+=ta[i+8 ];
995 		tc[0]+=ta[i+9 ];
996 		tc[0]+=ta[i+10 ];
997 		tc[0]+=ta[i+11 ];
998 		tc[0]+=ta[i+12 ];
999 		tc[0]+=ta[i+13 ];
1000 		tc[0]+=ta[i+14 ];
1001 		tc[0]+=ta[i+15 ];
1002 	}
1003 for(     ;i<n;++i){ 	tc[0]+=ta[i+0 ];
1004 	 }
1005 }
1006 ;
1007 	}
1008 	else
1009 #endif
1010 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1011 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1012 	{
1013 	const double complex*ta = a; double complex*tc = c; tc[0] = ((double complex)(0));
1014 	{
1015 for(i=0;i+15<n;i+=16){
1016 	tc[0]+=ta[i+0 ];
1017 		tc[0]+=ta[i+1 ];
1018 		tc[0]+=ta[i+2 ];
1019 		tc[0]+=ta[i+3 ];
1020 		tc[0]+=ta[i+4 ];
1021 		tc[0]+=ta[i+5 ];
1022 		tc[0]+=ta[i+6 ];
1023 		tc[0]+=ta[i+7 ];
1024 		tc[0]+=ta[i+8 ];
1025 		tc[0]+=ta[i+9 ];
1026 		tc[0]+=ta[i+10 ];
1027 		tc[0]+=ta[i+11 ];
1028 		tc[0]+=ta[i+12 ];
1029 		tc[0]+=ta[i+13 ];
1030 		tc[0]+=ta[i+14 ];
1031 		tc[0]+=ta[i+15 ];
1032 	}
1033 for(     ;i<n;++i){ 	tc[0]+=ta[i+0 ];
1034 	 }
1035 }
1036 ;
1037 	}
1038 	else
1039 #endif
1040 	return RSB_ERR_UNSUPPORTED_TYPE	;
1041 	return RSB_ERR_NO_ERROR;
1042 }
1043 
rsb__vector_mult_sum(const void * a,const void * b,void * c,rsb_type_t type,size_t n,const int inca,const int incb)1044 static rsb_err_t rsb__vector_mult_sum(const void * a, const void * b, void * c, rsb_type_t type, size_t n, const int inca, const int incb)
1045 {
1046 	/*!
1047 	 * c <- sum(a*b)
1048 	 * It is allowed to give c == a or c == b or a==b
1049          *
1050 	 * \param array	an array pointer
1051 	 * \param type	a valid type code
1052 	 * \param n	the input array length
1053 	 * \note see ddot in BLAS
1054 	 *
1055 	 * \return \rsberrcodemsg
1056 	 *
1057 	 * p.s.: this routine is, numerically speaking, a crime!
1058 	 *
1059 	 * */
1060 	size_t i;
1061 	if(a==b && inca==incb)
1062 		return rsb_vector_norm_square_strided(c,a,type,n,inca);
1063 	if(inca == 1 && incb == 1)
1064 	{
1065 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1066 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
1067 	{
1068 		const double*tb = b; const double*ta = a; double*tc = c,cacc = ((double)(0));
1069 		{
1070 for(i=0;i+15<n;i+=16){
1071 		cacc+=ta[i+0 ]*tb[i+0 ];
1072 			cacc+=ta[i+1 ]*tb[i+1 ];
1073 			cacc+=ta[i+2 ]*tb[i+2 ];
1074 			cacc+=ta[i+3 ]*tb[i+3 ];
1075 			cacc+=ta[i+4 ]*tb[i+4 ];
1076 			cacc+=ta[i+5 ]*tb[i+5 ];
1077 			cacc+=ta[i+6 ]*tb[i+6 ];
1078 			cacc+=ta[i+7 ]*tb[i+7 ];
1079 			cacc+=ta[i+8 ]*tb[i+8 ];
1080 			cacc+=ta[i+9 ]*tb[i+9 ];
1081 			cacc+=ta[i+10 ]*tb[i+10 ];
1082 			cacc+=ta[i+11 ]*tb[i+11 ];
1083 			cacc+=ta[i+12 ]*tb[i+12 ];
1084 			cacc+=ta[i+13 ]*tb[i+13 ];
1085 			cacc+=ta[i+14 ]*tb[i+14 ];
1086 			cacc+=ta[i+15 ]*tb[i+15 ];
1087 	}
1088 for(     ;i<n;++i){ 		cacc+=ta[i+0 ]*tb[i+0 ];
1089 	 }
1090 }
1091 ;
1092 		*tc = cacc;
1093 	}
1094 	else
1095 #endif
1096 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1097 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
1098 	{
1099 		const float*tb = b; const float*ta = a; float*tc = c,cacc = ((float)(0));
1100 		{
1101 for(i=0;i+15<n;i+=16){
1102 		cacc+=ta[i+0 ]*tb[i+0 ];
1103 			cacc+=ta[i+1 ]*tb[i+1 ];
1104 			cacc+=ta[i+2 ]*tb[i+2 ];
1105 			cacc+=ta[i+3 ]*tb[i+3 ];
1106 			cacc+=ta[i+4 ]*tb[i+4 ];
1107 			cacc+=ta[i+5 ]*tb[i+5 ];
1108 			cacc+=ta[i+6 ]*tb[i+6 ];
1109 			cacc+=ta[i+7 ]*tb[i+7 ];
1110 			cacc+=ta[i+8 ]*tb[i+8 ];
1111 			cacc+=ta[i+9 ]*tb[i+9 ];
1112 			cacc+=ta[i+10 ]*tb[i+10 ];
1113 			cacc+=ta[i+11 ]*tb[i+11 ];
1114 			cacc+=ta[i+12 ]*tb[i+12 ];
1115 			cacc+=ta[i+13 ]*tb[i+13 ];
1116 			cacc+=ta[i+14 ]*tb[i+14 ];
1117 			cacc+=ta[i+15 ]*tb[i+15 ];
1118 	}
1119 for(     ;i<n;++i){ 		cacc+=ta[i+0 ]*tb[i+0 ];
1120 	 }
1121 }
1122 ;
1123 		*tc = cacc;
1124 	}
1125 	else
1126 #endif
1127 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1128 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
1129 	{
1130 		const float complex*tb = b; const float complex*ta = a; float complex*tc = c,cacc = ((float complex)(0));
1131 		{
1132 for(i=0;i+15<n;i+=16){
1133 		cacc+=ta[i+0 ]*tb[i+0 ];
1134 			cacc+=ta[i+1 ]*tb[i+1 ];
1135 			cacc+=ta[i+2 ]*tb[i+2 ];
1136 			cacc+=ta[i+3 ]*tb[i+3 ];
1137 			cacc+=ta[i+4 ]*tb[i+4 ];
1138 			cacc+=ta[i+5 ]*tb[i+5 ];
1139 			cacc+=ta[i+6 ]*tb[i+6 ];
1140 			cacc+=ta[i+7 ]*tb[i+7 ];
1141 			cacc+=ta[i+8 ]*tb[i+8 ];
1142 			cacc+=ta[i+9 ]*tb[i+9 ];
1143 			cacc+=ta[i+10 ]*tb[i+10 ];
1144 			cacc+=ta[i+11 ]*tb[i+11 ];
1145 			cacc+=ta[i+12 ]*tb[i+12 ];
1146 			cacc+=ta[i+13 ]*tb[i+13 ];
1147 			cacc+=ta[i+14 ]*tb[i+14 ];
1148 			cacc+=ta[i+15 ]*tb[i+15 ];
1149 	}
1150 for(     ;i<n;++i){ 		cacc+=ta[i+0 ]*tb[i+0 ];
1151 	 }
1152 }
1153 ;
1154 		*tc = cacc;
1155 	}
1156 	else
1157 #endif
1158 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1159 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1160 	{
1161 		const double complex*tb = b; const double complex*ta = a; double complex*tc = c,cacc = ((double complex)(0));
1162 		{
1163 for(i=0;i+15<n;i+=16){
1164 		cacc+=ta[i+0 ]*tb[i+0 ];
1165 			cacc+=ta[i+1 ]*tb[i+1 ];
1166 			cacc+=ta[i+2 ]*tb[i+2 ];
1167 			cacc+=ta[i+3 ]*tb[i+3 ];
1168 			cacc+=ta[i+4 ]*tb[i+4 ];
1169 			cacc+=ta[i+5 ]*tb[i+5 ];
1170 			cacc+=ta[i+6 ]*tb[i+6 ];
1171 			cacc+=ta[i+7 ]*tb[i+7 ];
1172 			cacc+=ta[i+8 ]*tb[i+8 ];
1173 			cacc+=ta[i+9 ]*tb[i+9 ];
1174 			cacc+=ta[i+10 ]*tb[i+10 ];
1175 			cacc+=ta[i+11 ]*tb[i+11 ];
1176 			cacc+=ta[i+12 ]*tb[i+12 ];
1177 			cacc+=ta[i+13 ]*tb[i+13 ];
1178 			cacc+=ta[i+14 ]*tb[i+14 ];
1179 			cacc+=ta[i+15 ]*tb[i+15 ];
1180 	}
1181 for(     ;i<n;++i){ 		cacc+=ta[i+0 ]*tb[i+0 ];
1182 	 }
1183 }
1184 ;
1185 		*tc = cacc;
1186 	}
1187 	else
1188 #endif
1189 	return RSB_ERR_UNSUPPORTED_TYPE	;
1190 	}
1191 	else
1192 	{
1193 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1194 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
1195 	{
1196 		const double*tb = b; const double*ta = a; double*tc = c,cacc = ((double)(0));
1197 		{
1198 for(i=0;i+15<n;i+=16){
1199 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1200 			cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1201 			cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1202 			cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1203 			cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1204 			cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1205 			cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1206 			cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1207 			cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1208 			cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1209 			cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1210 			cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1211 			cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1212 			cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1213 			cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1214 			cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1215 	}
1216 for(     ;i<n;++i){ 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1217 	 }
1218 }
1219 ;
1220 		*tc = cacc;
1221 	}
1222 	else
1223 #endif
1224 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1225 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
1226 	{
1227 		const float*tb = b; const float*ta = a; float*tc = c,cacc = ((float)(0));
1228 		{
1229 for(i=0;i+15<n;i+=16){
1230 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1231 			cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1232 			cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1233 			cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1234 			cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1235 			cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1236 			cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1237 			cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1238 			cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1239 			cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1240 			cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1241 			cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1242 			cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1243 			cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1244 			cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1245 			cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1246 	}
1247 for(     ;i<n;++i){ 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1248 	 }
1249 }
1250 ;
1251 		*tc = cacc;
1252 	}
1253 	else
1254 #endif
1255 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1256 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
1257 	{
1258 		const float complex*tb = b; const float complex*ta = a; float complex*tc = c,cacc = ((float complex)(0));
1259 		{
1260 for(i=0;i+15<n;i+=16){
1261 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1262 			cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1263 			cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1264 			cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1265 			cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1266 			cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1267 			cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1268 			cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1269 			cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1270 			cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1271 			cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1272 			cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1273 			cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1274 			cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1275 			cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1276 			cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1277 	}
1278 for(     ;i<n;++i){ 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1279 	 }
1280 }
1281 ;
1282 		*tc = cacc;
1283 	}
1284 	else
1285 #endif
1286 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1287 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1288 	{
1289 		const double complex*tb = b; const double complex*ta = a; double complex*tc = c,cacc = ((double complex)(0));
1290 		{
1291 for(i=0;i+15<n;i+=16){
1292 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1293 			cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1294 			cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1295 			cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1296 			cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1297 			cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1298 			cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1299 			cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1300 			cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1301 			cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1302 			cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1303 			cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1304 			cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1305 			cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1306 			cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1307 			cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1308 	}
1309 for(     ;i<n;++i){ 		cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1310 	 }
1311 }
1312 ;
1313 		*tc = cacc;
1314 	}
1315 	else
1316 #endif
1317 	return RSB_ERR_UNSUPPORTED_TYPE	;
1318 	}
1319 	return RSB_ERR_NO_ERROR;
1320 }
1321 
rsb_fill_with_zeros_nostride(void * array,rsb_type_t type,size_t n)1322 static rsb_err_t rsb_fill_with_zeros_nostride(void * array, rsb_type_t type, size_t n)
1323 {
1324 	/*!
1325 	 * \ingroup gr_vec
1326 	 * Will zero the input n elements long array of type type.
1327 	 * \param array	an array pointer
1328 	 * \param type	a valid type code
1329 	 * \param n	the input array length
1330 	 *
1331 	 * \return \rsberrcodemsg
1332 	 * */
1333 	size_t i;
1334 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1335 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  ){
1336 	double*ta = array;
1337 {
1338 for(i=0;i+15<n;i+=16){
1339 ta[i+0 ] = ((double)(0));ta[i+1 ] = ((double)(0));ta[i+2 ] = ((double)(0));ta[i+3 ] = ((double)(0));ta[i+4 ] = ((double)(0));ta[i+5 ] = ((double)(0));ta[i+6 ] = ((double)(0));ta[i+7 ] = ((double)(0));ta[i+8 ] = ((double)(0));ta[i+9 ] = ((double)(0));ta[i+10 ] = ((double)(0));ta[i+11 ] = ((double)(0));ta[i+12 ] = ((double)(0));ta[i+13 ] = ((double)(0));ta[i+14 ] = ((double)(0));ta[i+15 ] = ((double)(0));}
1340 for(     ;i<n;++i){ ta[i+0 ] = ((double)(0)); }
1341 }
1342 }
1343 	else
1344 #endif
1345 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1346 	if( type == RSB_NUMERICAL_TYPE_FLOAT  ){
1347 	float*ta = array;
1348 {
1349 for(i=0;i+15<n;i+=16){
1350 ta[i+0 ] = ((float)(0));ta[i+1 ] = ((float)(0));ta[i+2 ] = ((float)(0));ta[i+3 ] = ((float)(0));ta[i+4 ] = ((float)(0));ta[i+5 ] = ((float)(0));ta[i+6 ] = ((float)(0));ta[i+7 ] = ((float)(0));ta[i+8 ] = ((float)(0));ta[i+9 ] = ((float)(0));ta[i+10 ] = ((float)(0));ta[i+11 ] = ((float)(0));ta[i+12 ] = ((float)(0));ta[i+13 ] = ((float)(0));ta[i+14 ] = ((float)(0));ta[i+15 ] = ((float)(0));}
1351 for(     ;i<n;++i){ ta[i+0 ] = ((float)(0)); }
1352 }
1353 }
1354 	else
1355 #endif
1356 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1357 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ){
1358 	float complex*ta = array;
1359 {
1360 for(i=0;i+15<n;i+=16){
1361 ta[i+0 ] = ((float complex)(0));ta[i+1 ] = ((float complex)(0));ta[i+2 ] = ((float complex)(0));ta[i+3 ] = ((float complex)(0));ta[i+4 ] = ((float complex)(0));ta[i+5 ] = ((float complex)(0));ta[i+6 ] = ((float complex)(0));ta[i+7 ] = ((float complex)(0));ta[i+8 ] = ((float complex)(0));ta[i+9 ] = ((float complex)(0));ta[i+10 ] = ((float complex)(0));ta[i+11 ] = ((float complex)(0));ta[i+12 ] = ((float complex)(0));ta[i+13 ] = ((float complex)(0));ta[i+14 ] = ((float complex)(0));ta[i+15 ] = ((float complex)(0));}
1362 for(     ;i<n;++i){ ta[i+0 ] = ((float complex)(0)); }
1363 }
1364 }
1365 	else
1366 #endif
1367 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1368 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ){
1369 	double complex*ta = array;
1370 {
1371 for(i=0;i+15<n;i+=16){
1372 ta[i+0 ] = ((double complex)(0));ta[i+1 ] = ((double complex)(0));ta[i+2 ] = ((double complex)(0));ta[i+3 ] = ((double complex)(0));ta[i+4 ] = ((double complex)(0));ta[i+5 ] = ((double complex)(0));ta[i+6 ] = ((double complex)(0));ta[i+7 ] = ((double complex)(0));ta[i+8 ] = ((double complex)(0));ta[i+9 ] = ((double complex)(0));ta[i+10 ] = ((double complex)(0));ta[i+11 ] = ((double complex)(0));ta[i+12 ] = ((double complex)(0));ta[i+13 ] = ((double complex)(0));ta[i+14 ] = ((double complex)(0));ta[i+15 ] = ((double complex)(0));}
1373 for(     ;i<n;++i){ ta[i+0 ] = ((double complex)(0)); }
1374 }
1375 }
1376 	else
1377 #endif
1378 		return RSB_ERR_UNSUPPORTED_TYPE	;
1379 	return RSB_ERR_NO_ERROR;
1380 }
1381 
rsb_fill_with_zeros(void * array,rsb_type_t type,size_t n,size_t incx)1382 static rsb_err_t rsb_fill_with_zeros(void * array, rsb_type_t type, size_t n, size_t incx)
1383 {
1384 	/*!
1385 	 * \ingroup gr_vec
1386 	 * Will zero the input n elements long array of type type.
1387 	 * \param array	an array pointer
1388 	 * \param type	a valid type code
1389 	 * \param n	the input array length
1390 	 *
1391 	 * \return \rsberrcodemsg
1392 	 * */
1393 	size_t i;
1394 	if(incx==1)
1395 		return rsb_fill_with_zeros_nostride(array,type,n);
1396 
1397 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1398 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  ){
1399 	double*ta = array;
1400 {
1401 for(i=0;i+15<n;i+=16){
1402 ta[(i+0 )*incx] = ((double)(0));ta[(i+1 )*incx] = ((double)(0));ta[(i+2 )*incx] = ((double)(0));ta[(i+3 )*incx] = ((double)(0));ta[(i+4 )*incx] = ((double)(0));ta[(i+5 )*incx] = ((double)(0));ta[(i+6 )*incx] = ((double)(0));ta[(i+7 )*incx] = ((double)(0));ta[(i+8 )*incx] = ((double)(0));ta[(i+9 )*incx] = ((double)(0));ta[(i+10 )*incx] = ((double)(0));ta[(i+11 )*incx] = ((double)(0));ta[(i+12 )*incx] = ((double)(0));ta[(i+13 )*incx] = ((double)(0));ta[(i+14 )*incx] = ((double)(0));ta[(i+15 )*incx] = ((double)(0));}
1403 for(     ;i<n;++i){ ta[(i+0 )*incx] = ((double)(0)); }
1404 }
1405 }
1406 	else
1407 #endif
1408 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1409 	if( type == RSB_NUMERICAL_TYPE_FLOAT  ){
1410 	float*ta = array;
1411 {
1412 for(i=0;i+15<n;i+=16){
1413 ta[(i+0 )*incx] = ((float)(0));ta[(i+1 )*incx] = ((float)(0));ta[(i+2 )*incx] = ((float)(0));ta[(i+3 )*incx] = ((float)(0));ta[(i+4 )*incx] = ((float)(0));ta[(i+5 )*incx] = ((float)(0));ta[(i+6 )*incx] = ((float)(0));ta[(i+7 )*incx] = ((float)(0));ta[(i+8 )*incx] = ((float)(0));ta[(i+9 )*incx] = ((float)(0));ta[(i+10 )*incx] = ((float)(0));ta[(i+11 )*incx] = ((float)(0));ta[(i+12 )*incx] = ((float)(0));ta[(i+13 )*incx] = ((float)(0));ta[(i+14 )*incx] = ((float)(0));ta[(i+15 )*incx] = ((float)(0));}
1414 for(     ;i<n;++i){ ta[(i+0 )*incx] = ((float)(0)); }
1415 }
1416 }
1417 	else
1418 #endif
1419 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1420 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ){
1421 	float complex*ta = array;
1422 {
1423 for(i=0;i+15<n;i+=16){
1424 ta[(i+0 )*incx] = ((float complex)(0));ta[(i+1 )*incx] = ((float complex)(0));ta[(i+2 )*incx] = ((float complex)(0));ta[(i+3 )*incx] = ((float complex)(0));ta[(i+4 )*incx] = ((float complex)(0));ta[(i+5 )*incx] = ((float complex)(0));ta[(i+6 )*incx] = ((float complex)(0));ta[(i+7 )*incx] = ((float complex)(0));ta[(i+8 )*incx] = ((float complex)(0));ta[(i+9 )*incx] = ((float complex)(0));ta[(i+10 )*incx] = ((float complex)(0));ta[(i+11 )*incx] = ((float complex)(0));ta[(i+12 )*incx] = ((float complex)(0));ta[(i+13 )*incx] = ((float complex)(0));ta[(i+14 )*incx] = ((float complex)(0));ta[(i+15 )*incx] = ((float complex)(0));}
1425 for(     ;i<n;++i){ ta[(i+0 )*incx] = ((float complex)(0)); }
1426 }
1427 }
1428 	else
1429 #endif
1430 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1431 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ){
1432 	double complex*ta = array;
1433 {
1434 for(i=0;i+15<n;i+=16){
1435 ta[(i+0 )*incx] = ((double complex)(0));ta[(i+1 )*incx] = ((double complex)(0));ta[(i+2 )*incx] = ((double complex)(0));ta[(i+3 )*incx] = ((double complex)(0));ta[(i+4 )*incx] = ((double complex)(0));ta[(i+5 )*incx] = ((double complex)(0));ta[(i+6 )*incx] = ((double complex)(0));ta[(i+7 )*incx] = ((double complex)(0));ta[(i+8 )*incx] = ((double complex)(0));ta[(i+9 )*incx] = ((double complex)(0));ta[(i+10 )*incx] = ((double complex)(0));ta[(i+11 )*incx] = ((double complex)(0));ta[(i+12 )*incx] = ((double complex)(0));ta[(i+13 )*incx] = ((double complex)(0));ta[(i+14 )*incx] = ((double complex)(0));ta[(i+15 )*incx] = ((double complex)(0));}
1436 for(     ;i<n;++i){ ta[(i+0 )*incx] = ((double complex)(0)); }
1437 }
1438 }
1439 	else
1440 #endif
1441 		return RSB_ERR_UNSUPPORTED_TYPE	;
1442 	return RSB_ERR_NO_ERROR;
1443 }
1444 
rsb_vector_scale(void * a,const void * alphap,rsb_type_t type,size_t n)1445 static rsb_err_t rsb_vector_scale(void * a, const void * alphap, rsb_type_t type, size_t n)
1446 {
1447 	/*!
1448 	 * a <- a * alpha
1449          *
1450 	 * \param array	an array pointer
1451 	 * \param type	a valid type code
1452 	 * \param alphap scaling value (if NULL assumed to be zero)
1453 	 * \param n	the input array length
1454 	 * \note see dscal in BLAS
1455 	 *
1456 	 * \return \rsberrcodemsg
1457 	 * */
1458 	size_t i;
1459 	if(alphap==NULL || RSB_IS_ELEMENT_ZERO(alphap,type))
1460 		return rsb_fill_with_zeros(a,type,n,1);
1461 
1462 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1463 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
1464 	{
1465 	const double alpha = *(double*)alphap; double*ta = a;
1466 	{
1467 for(i=0;i+15<n;i+=16){
1468 	ta[i+0 ]*=alpha;
1469 		ta[i+1 ]*=alpha;
1470 		ta[i+2 ]*=alpha;
1471 		ta[i+3 ]*=alpha;
1472 		ta[i+4 ]*=alpha;
1473 		ta[i+5 ]*=alpha;
1474 		ta[i+6 ]*=alpha;
1475 		ta[i+7 ]*=alpha;
1476 		ta[i+8 ]*=alpha;
1477 		ta[i+9 ]*=alpha;
1478 		ta[i+10 ]*=alpha;
1479 		ta[i+11 ]*=alpha;
1480 		ta[i+12 ]*=alpha;
1481 		ta[i+13 ]*=alpha;
1482 		ta[i+14 ]*=alpha;
1483 		ta[i+15 ]*=alpha;
1484 	}
1485 for(     ;i<n;++i){ 	ta[i+0 ]*=alpha;
1486 	 }
1487 }
1488 ;
1489 	}
1490 	else
1491 #endif
1492 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1493 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
1494 	{
1495 	const float alpha = *(float*)alphap; float*ta = a;
1496 	{
1497 for(i=0;i+15<n;i+=16){
1498 	ta[i+0 ]*=alpha;
1499 		ta[i+1 ]*=alpha;
1500 		ta[i+2 ]*=alpha;
1501 		ta[i+3 ]*=alpha;
1502 		ta[i+4 ]*=alpha;
1503 		ta[i+5 ]*=alpha;
1504 		ta[i+6 ]*=alpha;
1505 		ta[i+7 ]*=alpha;
1506 		ta[i+8 ]*=alpha;
1507 		ta[i+9 ]*=alpha;
1508 		ta[i+10 ]*=alpha;
1509 		ta[i+11 ]*=alpha;
1510 		ta[i+12 ]*=alpha;
1511 		ta[i+13 ]*=alpha;
1512 		ta[i+14 ]*=alpha;
1513 		ta[i+15 ]*=alpha;
1514 	}
1515 for(     ;i<n;++i){ 	ta[i+0 ]*=alpha;
1516 	 }
1517 }
1518 ;
1519 	}
1520 	else
1521 #endif
1522 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1523 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
1524 	{
1525 	const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1526 	{
1527 for(i=0;i+15<n;i+=16){
1528 	ta[i+0 ]*=alpha;
1529 		ta[i+1 ]*=alpha;
1530 		ta[i+2 ]*=alpha;
1531 		ta[i+3 ]*=alpha;
1532 		ta[i+4 ]*=alpha;
1533 		ta[i+5 ]*=alpha;
1534 		ta[i+6 ]*=alpha;
1535 		ta[i+7 ]*=alpha;
1536 		ta[i+8 ]*=alpha;
1537 		ta[i+9 ]*=alpha;
1538 		ta[i+10 ]*=alpha;
1539 		ta[i+11 ]*=alpha;
1540 		ta[i+12 ]*=alpha;
1541 		ta[i+13 ]*=alpha;
1542 		ta[i+14 ]*=alpha;
1543 		ta[i+15 ]*=alpha;
1544 	}
1545 for(     ;i<n;++i){ 	ta[i+0 ]*=alpha;
1546 	 }
1547 }
1548 ;
1549 	}
1550 	else
1551 #endif
1552 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1553 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1554 	{
1555 	const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1556 	{
1557 for(i=0;i+15<n;i+=16){
1558 	ta[i+0 ]*=alpha;
1559 		ta[i+1 ]*=alpha;
1560 		ta[i+2 ]*=alpha;
1561 		ta[i+3 ]*=alpha;
1562 		ta[i+4 ]*=alpha;
1563 		ta[i+5 ]*=alpha;
1564 		ta[i+6 ]*=alpha;
1565 		ta[i+7 ]*=alpha;
1566 		ta[i+8 ]*=alpha;
1567 		ta[i+9 ]*=alpha;
1568 		ta[i+10 ]*=alpha;
1569 		ta[i+11 ]*=alpha;
1570 		ta[i+12 ]*=alpha;
1571 		ta[i+13 ]*=alpha;
1572 		ta[i+14 ]*=alpha;
1573 		ta[i+15 ]*=alpha;
1574 	}
1575 for(     ;i<n;++i){ 	ta[i+0 ]*=alpha;
1576 	 }
1577 }
1578 ;
1579 	}
1580 	else
1581 #endif
1582 	return RSB_ERR_UNSUPPORTED_TYPE	;
1583 	return RSB_ERR_NO_ERROR;
1584 }
1585 
rsb_strided_vector_scale(void * a,const void * alphap,rsb_type_t type,size_t n,size_t stride)1586 static rsb_err_t rsb_strided_vector_scale(void * a, const void * alphap, rsb_type_t type, size_t n, size_t stride)
1587 {
1588 	/*!
1589 	 * a <- a * alpha
1590          *
1591 	 * \param array	an array pointer
1592 	 * \param type	a valid type code
1593 	 * \param n	the input array length
1594 	 * \note see dscal in BLAS
1595 	 *
1596 	 * \return \rsberrcodemsg
1597 	 * */
1598 	size_t i;
1599 	if(stride==1)
1600 		return rsb_vector_scale(a,alphap,type,n);
1601 	if(alphap==NULL || RSB_IS_ELEMENT_ZERO(alphap,type))
1602 		return rsb_fill_with_zeros(a,type,n,stride);
1603 
1604 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1605 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
1606 	{
1607 		const double alpha = *(double*)alphap; double*ta = a;
1608 		{
1609 for(i=0;i+15<n;i+=16){
1610 		ta[stride*(i+0 )]*=alpha;
1611 			ta[stride*(i+1 )]*=alpha;
1612 			ta[stride*(i+2 )]*=alpha;
1613 			ta[stride*(i+3 )]*=alpha;
1614 			ta[stride*(i+4 )]*=alpha;
1615 			ta[stride*(i+5 )]*=alpha;
1616 			ta[stride*(i+6 )]*=alpha;
1617 			ta[stride*(i+7 )]*=alpha;
1618 			ta[stride*(i+8 )]*=alpha;
1619 			ta[stride*(i+9 )]*=alpha;
1620 			ta[stride*(i+10 )]*=alpha;
1621 			ta[stride*(i+11 )]*=alpha;
1622 			ta[stride*(i+12 )]*=alpha;
1623 			ta[stride*(i+13 )]*=alpha;
1624 			ta[stride*(i+14 )]*=alpha;
1625 			ta[stride*(i+15 )]*=alpha;
1626 	}
1627 for(     ;i<n;++i){ 		ta[stride*(i+0 )]*=alpha;
1628 	 }
1629 }
1630 ;
1631 	}
1632 	else
1633 #endif
1634 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1635 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
1636 	{
1637 		const float alpha = *(float*)alphap; float*ta = a;
1638 		{
1639 for(i=0;i+15<n;i+=16){
1640 		ta[stride*(i+0 )]*=alpha;
1641 			ta[stride*(i+1 )]*=alpha;
1642 			ta[stride*(i+2 )]*=alpha;
1643 			ta[stride*(i+3 )]*=alpha;
1644 			ta[stride*(i+4 )]*=alpha;
1645 			ta[stride*(i+5 )]*=alpha;
1646 			ta[stride*(i+6 )]*=alpha;
1647 			ta[stride*(i+7 )]*=alpha;
1648 			ta[stride*(i+8 )]*=alpha;
1649 			ta[stride*(i+9 )]*=alpha;
1650 			ta[stride*(i+10 )]*=alpha;
1651 			ta[stride*(i+11 )]*=alpha;
1652 			ta[stride*(i+12 )]*=alpha;
1653 			ta[stride*(i+13 )]*=alpha;
1654 			ta[stride*(i+14 )]*=alpha;
1655 			ta[stride*(i+15 )]*=alpha;
1656 	}
1657 for(     ;i<n;++i){ 		ta[stride*(i+0 )]*=alpha;
1658 	 }
1659 }
1660 ;
1661 	}
1662 	else
1663 #endif
1664 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1665 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
1666 	{
1667 		const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1668 		{
1669 for(i=0;i+15<n;i+=16){
1670 		ta[stride*(i+0 )]*=alpha;
1671 			ta[stride*(i+1 )]*=alpha;
1672 			ta[stride*(i+2 )]*=alpha;
1673 			ta[stride*(i+3 )]*=alpha;
1674 			ta[stride*(i+4 )]*=alpha;
1675 			ta[stride*(i+5 )]*=alpha;
1676 			ta[stride*(i+6 )]*=alpha;
1677 			ta[stride*(i+7 )]*=alpha;
1678 			ta[stride*(i+8 )]*=alpha;
1679 			ta[stride*(i+9 )]*=alpha;
1680 			ta[stride*(i+10 )]*=alpha;
1681 			ta[stride*(i+11 )]*=alpha;
1682 			ta[stride*(i+12 )]*=alpha;
1683 			ta[stride*(i+13 )]*=alpha;
1684 			ta[stride*(i+14 )]*=alpha;
1685 			ta[stride*(i+15 )]*=alpha;
1686 	}
1687 for(     ;i<n;++i){ 		ta[stride*(i+0 )]*=alpha;
1688 	 }
1689 }
1690 ;
1691 	}
1692 	else
1693 #endif
1694 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1695 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1696 	{
1697 		const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1698 		{
1699 for(i=0;i+15<n;i+=16){
1700 		ta[stride*(i+0 )]*=alpha;
1701 			ta[stride*(i+1 )]*=alpha;
1702 			ta[stride*(i+2 )]*=alpha;
1703 			ta[stride*(i+3 )]*=alpha;
1704 			ta[stride*(i+4 )]*=alpha;
1705 			ta[stride*(i+5 )]*=alpha;
1706 			ta[stride*(i+6 )]*=alpha;
1707 			ta[stride*(i+7 )]*=alpha;
1708 			ta[stride*(i+8 )]*=alpha;
1709 			ta[stride*(i+9 )]*=alpha;
1710 			ta[stride*(i+10 )]*=alpha;
1711 			ta[stride*(i+11 )]*=alpha;
1712 			ta[stride*(i+12 )]*=alpha;
1713 			ta[stride*(i+13 )]*=alpha;
1714 			ta[stride*(i+14 )]*=alpha;
1715 			ta[stride*(i+15 )]*=alpha;
1716 	}
1717 for(     ;i<n;++i){ 		ta[stride*(i+0 )]*=alpha;
1718 	 }
1719 }
1720 ;
1721 	}
1722 	else
1723 #endif
1724 	return RSB_ERR_UNSUPPORTED_TYPE	;
1725 	return RSB_ERR_NO_ERROR;
1726 }
1727 
rsb__util_vector_add(void * a,const void * alphap,rsb_type_t type,size_t n)1728 rsb_err_t rsb__util_vector_add(void * a, const void * alphap, rsb_type_t type, size_t n)
1729 {
1730 	/*!
1731 	 * a <- a + alpha
1732 	 *
1733 	 * \return \rsberrcodemsg
1734 	 * */
1735 	size_t i;
1736 
1737 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1738 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
1739 	{
1740 		const double alpha = *(double*)alphap; double*ta = a;
1741 		{
1742 for(i=0;i+15<n;i+=16){
1743 		ta[i+0 ]+=alpha;
1744 			ta[i+1 ]+=alpha;
1745 			ta[i+2 ]+=alpha;
1746 			ta[i+3 ]+=alpha;
1747 			ta[i+4 ]+=alpha;
1748 			ta[i+5 ]+=alpha;
1749 			ta[i+6 ]+=alpha;
1750 			ta[i+7 ]+=alpha;
1751 			ta[i+8 ]+=alpha;
1752 			ta[i+9 ]+=alpha;
1753 			ta[i+10 ]+=alpha;
1754 			ta[i+11 ]+=alpha;
1755 			ta[i+12 ]+=alpha;
1756 			ta[i+13 ]+=alpha;
1757 			ta[i+14 ]+=alpha;
1758 			ta[i+15 ]+=alpha;
1759 	}
1760 for(     ;i<n;++i){ 		ta[i+0 ]+=alpha;
1761 	 }
1762 }
1763 ;
1764 	}
1765 	else
1766 #endif
1767 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1768 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
1769 	{
1770 		const float alpha = *(float*)alphap; float*ta = a;
1771 		{
1772 for(i=0;i+15<n;i+=16){
1773 		ta[i+0 ]+=alpha;
1774 			ta[i+1 ]+=alpha;
1775 			ta[i+2 ]+=alpha;
1776 			ta[i+3 ]+=alpha;
1777 			ta[i+4 ]+=alpha;
1778 			ta[i+5 ]+=alpha;
1779 			ta[i+6 ]+=alpha;
1780 			ta[i+7 ]+=alpha;
1781 			ta[i+8 ]+=alpha;
1782 			ta[i+9 ]+=alpha;
1783 			ta[i+10 ]+=alpha;
1784 			ta[i+11 ]+=alpha;
1785 			ta[i+12 ]+=alpha;
1786 			ta[i+13 ]+=alpha;
1787 			ta[i+14 ]+=alpha;
1788 			ta[i+15 ]+=alpha;
1789 	}
1790 for(     ;i<n;++i){ 		ta[i+0 ]+=alpha;
1791 	 }
1792 }
1793 ;
1794 	}
1795 	else
1796 #endif
1797 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1798 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
1799 	{
1800 		const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1801 		{
1802 for(i=0;i+15<n;i+=16){
1803 		ta[i+0 ]+=alpha;
1804 			ta[i+1 ]+=alpha;
1805 			ta[i+2 ]+=alpha;
1806 			ta[i+3 ]+=alpha;
1807 			ta[i+4 ]+=alpha;
1808 			ta[i+5 ]+=alpha;
1809 			ta[i+6 ]+=alpha;
1810 			ta[i+7 ]+=alpha;
1811 			ta[i+8 ]+=alpha;
1812 			ta[i+9 ]+=alpha;
1813 			ta[i+10 ]+=alpha;
1814 			ta[i+11 ]+=alpha;
1815 			ta[i+12 ]+=alpha;
1816 			ta[i+13 ]+=alpha;
1817 			ta[i+14 ]+=alpha;
1818 			ta[i+15 ]+=alpha;
1819 	}
1820 for(     ;i<n;++i){ 		ta[i+0 ]+=alpha;
1821 	 }
1822 }
1823 ;
1824 	}
1825 	else
1826 #endif
1827 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1828 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1829 	{
1830 		const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1831 		{
1832 for(i=0;i+15<n;i+=16){
1833 		ta[i+0 ]+=alpha;
1834 			ta[i+1 ]+=alpha;
1835 			ta[i+2 ]+=alpha;
1836 			ta[i+3 ]+=alpha;
1837 			ta[i+4 ]+=alpha;
1838 			ta[i+5 ]+=alpha;
1839 			ta[i+6 ]+=alpha;
1840 			ta[i+7 ]+=alpha;
1841 			ta[i+8 ]+=alpha;
1842 			ta[i+9 ]+=alpha;
1843 			ta[i+10 ]+=alpha;
1844 			ta[i+11 ]+=alpha;
1845 			ta[i+12 ]+=alpha;
1846 			ta[i+13 ]+=alpha;
1847 			ta[i+14 ]+=alpha;
1848 			ta[i+15 ]+=alpha;
1849 	}
1850 for(     ;i<n;++i){ 		ta[i+0 ]+=alpha;
1851 	 }
1852 }
1853 ;
1854 	}
1855 	else
1856 #endif
1857 	return RSB_ERR_UNSUPPORTED_TYPE	;
1858 	return RSB_ERR_NO_ERROR;
1859 }
1860 
rsb__util_vector_div(void * a,const void * alphap,rsb_type_t type,size_t n)1861 rsb_err_t rsb__util_vector_div(void * a, const void * alphap, rsb_type_t type, size_t n)
1862 {
1863 	/*!
1864 	 * this is a benchmark-oriented function only..
1865 	 * \return \rsberrcodemsg
1866 	 * */
1867 	size_t i;
1868 
1869 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1870 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
1871 	{
1872 	const double alpha = *(double*)alphap; double*ta = a;
1873 	{
1874 for(i=0;i+15<n;i+=16){
1875 	ta[i+0 ]/=alpha;
1876 		ta[i+1 ]/=alpha;
1877 		ta[i+2 ]/=alpha;
1878 		ta[i+3 ]/=alpha;
1879 		ta[i+4 ]/=alpha;
1880 		ta[i+5 ]/=alpha;
1881 		ta[i+6 ]/=alpha;
1882 		ta[i+7 ]/=alpha;
1883 		ta[i+8 ]/=alpha;
1884 		ta[i+9 ]/=alpha;
1885 		ta[i+10 ]/=alpha;
1886 		ta[i+11 ]/=alpha;
1887 		ta[i+12 ]/=alpha;
1888 		ta[i+13 ]/=alpha;
1889 		ta[i+14 ]/=alpha;
1890 		ta[i+15 ]/=alpha;
1891 	}
1892 for(     ;i<n;++i){ 	ta[i+0 ]/=alpha;
1893 	 }
1894 }
1895 ;
1896 	}
1897 	else
1898 #endif
1899 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1900 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
1901 	{
1902 	const float alpha = *(float*)alphap; float*ta = a;
1903 	{
1904 for(i=0;i+15<n;i+=16){
1905 	ta[i+0 ]/=alpha;
1906 		ta[i+1 ]/=alpha;
1907 		ta[i+2 ]/=alpha;
1908 		ta[i+3 ]/=alpha;
1909 		ta[i+4 ]/=alpha;
1910 		ta[i+5 ]/=alpha;
1911 		ta[i+6 ]/=alpha;
1912 		ta[i+7 ]/=alpha;
1913 		ta[i+8 ]/=alpha;
1914 		ta[i+9 ]/=alpha;
1915 		ta[i+10 ]/=alpha;
1916 		ta[i+11 ]/=alpha;
1917 		ta[i+12 ]/=alpha;
1918 		ta[i+13 ]/=alpha;
1919 		ta[i+14 ]/=alpha;
1920 		ta[i+15 ]/=alpha;
1921 	}
1922 for(     ;i<n;++i){ 	ta[i+0 ]/=alpha;
1923 	 }
1924 }
1925 ;
1926 	}
1927 	else
1928 #endif
1929 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1930 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
1931 	{
1932 	const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1933 	{
1934 for(i=0;i+15<n;i+=16){
1935 	ta[i+0 ]/=alpha;
1936 		ta[i+1 ]/=alpha;
1937 		ta[i+2 ]/=alpha;
1938 		ta[i+3 ]/=alpha;
1939 		ta[i+4 ]/=alpha;
1940 		ta[i+5 ]/=alpha;
1941 		ta[i+6 ]/=alpha;
1942 		ta[i+7 ]/=alpha;
1943 		ta[i+8 ]/=alpha;
1944 		ta[i+9 ]/=alpha;
1945 		ta[i+10 ]/=alpha;
1946 		ta[i+11 ]/=alpha;
1947 		ta[i+12 ]/=alpha;
1948 		ta[i+13 ]/=alpha;
1949 		ta[i+14 ]/=alpha;
1950 		ta[i+15 ]/=alpha;
1951 	}
1952 for(     ;i<n;++i){ 	ta[i+0 ]/=alpha;
1953 	 }
1954 }
1955 ;
1956 	}
1957 	else
1958 #endif
1959 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1960 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
1961 	{
1962 	const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1963 	{
1964 for(i=0;i+15<n;i+=16){
1965 	ta[i+0 ]/=alpha;
1966 		ta[i+1 ]/=alpha;
1967 		ta[i+2 ]/=alpha;
1968 		ta[i+3 ]/=alpha;
1969 		ta[i+4 ]/=alpha;
1970 		ta[i+5 ]/=alpha;
1971 		ta[i+6 ]/=alpha;
1972 		ta[i+7 ]/=alpha;
1973 		ta[i+8 ]/=alpha;
1974 		ta[i+9 ]/=alpha;
1975 		ta[i+10 ]/=alpha;
1976 		ta[i+11 ]/=alpha;
1977 		ta[i+12 ]/=alpha;
1978 		ta[i+13 ]/=alpha;
1979 		ta[i+14 ]/=alpha;
1980 		ta[i+15 ]/=alpha;
1981 	}
1982 for(     ;i<n;++i){ 	ta[i+0 ]/=alpha;
1983 	 }
1984 }
1985 ;
1986 	}
1987 	else
1988 #endif
1989 	return RSB_ERR_UNSUPPORTED_TYPE	;
1990 	return RSB_ERR_NO_ERROR;
1991 }
1992 
rsb__vector_increase_by_one(void * a,rsb_type_t type,size_t n)1993 rsb_err_t rsb__vector_increase_by_one(void * a, rsb_type_t type, size_t n)
1994 {
1995 	/*!
1996 	 * \return \rsberrcodemsg
1997 	 * */
1998 	size_t i;
1999 
2000 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2001 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2002 	{ double*ta = a;
2003 	{
2004 for(i=0;i+15<n;i+=16){
2005 	ta[i+0 ]+=((double)(1.0));
2006 		ta[i+1 ]+=((double)(1.0));
2007 		ta[i+2 ]+=((double)(1.0));
2008 		ta[i+3 ]+=((double)(1.0));
2009 		ta[i+4 ]+=((double)(1.0));
2010 		ta[i+5 ]+=((double)(1.0));
2011 		ta[i+6 ]+=((double)(1.0));
2012 		ta[i+7 ]+=((double)(1.0));
2013 		ta[i+8 ]+=((double)(1.0));
2014 		ta[i+9 ]+=((double)(1.0));
2015 		ta[i+10 ]+=((double)(1.0));
2016 		ta[i+11 ]+=((double)(1.0));
2017 		ta[i+12 ]+=((double)(1.0));
2018 		ta[i+13 ]+=((double)(1.0));
2019 		ta[i+14 ]+=((double)(1.0));
2020 		ta[i+15 ]+=((double)(1.0));
2021 	}
2022 for(     ;i<n;++i){ 	ta[i+0 ]+=((double)(1.0));
2023 	 }
2024 }
2025 ;
2026 	}
2027 	else
2028 #endif
2029 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2030 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2031 	{ float*ta = a;
2032 	{
2033 for(i=0;i+15<n;i+=16){
2034 	ta[i+0 ]+=((float)(1.0));
2035 		ta[i+1 ]+=((float)(1.0));
2036 		ta[i+2 ]+=((float)(1.0));
2037 		ta[i+3 ]+=((float)(1.0));
2038 		ta[i+4 ]+=((float)(1.0));
2039 		ta[i+5 ]+=((float)(1.0));
2040 		ta[i+6 ]+=((float)(1.0));
2041 		ta[i+7 ]+=((float)(1.0));
2042 		ta[i+8 ]+=((float)(1.0));
2043 		ta[i+9 ]+=((float)(1.0));
2044 		ta[i+10 ]+=((float)(1.0));
2045 		ta[i+11 ]+=((float)(1.0));
2046 		ta[i+12 ]+=((float)(1.0));
2047 		ta[i+13 ]+=((float)(1.0));
2048 		ta[i+14 ]+=((float)(1.0));
2049 		ta[i+15 ]+=((float)(1.0));
2050 	}
2051 for(     ;i<n;++i){ 	ta[i+0 ]+=((float)(1.0));
2052 	 }
2053 }
2054 ;
2055 	}
2056 	else
2057 #endif
2058 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2059 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2060 	{ float complex*ta = a;
2061 	{
2062 for(i=0;i+15<n;i+=16){
2063 	ta[i+0 ]+=((float complex)(1.0));
2064 		ta[i+1 ]+=((float complex)(1.0));
2065 		ta[i+2 ]+=((float complex)(1.0));
2066 		ta[i+3 ]+=((float complex)(1.0));
2067 		ta[i+4 ]+=((float complex)(1.0));
2068 		ta[i+5 ]+=((float complex)(1.0));
2069 		ta[i+6 ]+=((float complex)(1.0));
2070 		ta[i+7 ]+=((float complex)(1.0));
2071 		ta[i+8 ]+=((float complex)(1.0));
2072 		ta[i+9 ]+=((float complex)(1.0));
2073 		ta[i+10 ]+=((float complex)(1.0));
2074 		ta[i+11 ]+=((float complex)(1.0));
2075 		ta[i+12 ]+=((float complex)(1.0));
2076 		ta[i+13 ]+=((float complex)(1.0));
2077 		ta[i+14 ]+=((float complex)(1.0));
2078 		ta[i+15 ]+=((float complex)(1.0));
2079 	}
2080 for(     ;i<n;++i){ 	ta[i+0 ]+=((float complex)(1.0));
2081 	 }
2082 }
2083 ;
2084 	}
2085 	else
2086 #endif
2087 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2088 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2089 	{ double complex*ta = a;
2090 	{
2091 for(i=0;i+15<n;i+=16){
2092 	ta[i+0 ]+=((double complex)(1.0));
2093 		ta[i+1 ]+=((double complex)(1.0));
2094 		ta[i+2 ]+=((double complex)(1.0));
2095 		ta[i+3 ]+=((double complex)(1.0));
2096 		ta[i+4 ]+=((double complex)(1.0));
2097 		ta[i+5 ]+=((double complex)(1.0));
2098 		ta[i+6 ]+=((double complex)(1.0));
2099 		ta[i+7 ]+=((double complex)(1.0));
2100 		ta[i+8 ]+=((double complex)(1.0));
2101 		ta[i+9 ]+=((double complex)(1.0));
2102 		ta[i+10 ]+=((double complex)(1.0));
2103 		ta[i+11 ]+=((double complex)(1.0));
2104 		ta[i+12 ]+=((double complex)(1.0));
2105 		ta[i+13 ]+=((double complex)(1.0));
2106 		ta[i+14 ]+=((double complex)(1.0));
2107 		ta[i+15 ]+=((double complex)(1.0));
2108 	}
2109 for(     ;i<n;++i){ 	ta[i+0 ]+=((double complex)(1.0));
2110 	 }
2111 }
2112 ;
2113 	}
2114 	else
2115 #endif
2116 	return RSB_ERR_UNSUPPORTED_TYPE	;
2117 	return RSB_ERR_NO_ERROR;
2118 }
2119 
rsb__util_vector_pow(void * a,rsb_type_t type,const void * y,size_t n)2120 rsb_err_t rsb__util_vector_pow(void * a, rsb_type_t type, const void *y, size_t n)
2121 {
2122 	/*!
2123 	 * \return \rsberrcodemsg
2124 	 * */
2125 	size_t i;
2126 	if(!a || !y)
2127 		goto err;
2128 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2129 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2130 	{
2131 		double ty = *(double*)y,*ta = a;
2132 	{
2133 for(i=0;i+15<n;i+=16){
2134 	ta[i+0 ] = pow(ta[i+0 ],ty);
2135 		ta[i+1 ] = pow(ta[i+1 ],ty);
2136 		ta[i+2 ] = pow(ta[i+2 ],ty);
2137 		ta[i+3 ] = pow(ta[i+3 ],ty);
2138 		ta[i+4 ] = pow(ta[i+4 ],ty);
2139 		ta[i+5 ] = pow(ta[i+5 ],ty);
2140 		ta[i+6 ] = pow(ta[i+6 ],ty);
2141 		ta[i+7 ] = pow(ta[i+7 ],ty);
2142 		ta[i+8 ] = pow(ta[i+8 ],ty);
2143 		ta[i+9 ] = pow(ta[i+9 ],ty);
2144 		ta[i+10 ] = pow(ta[i+10 ],ty);
2145 		ta[i+11 ] = pow(ta[i+11 ],ty);
2146 		ta[i+12 ] = pow(ta[i+12 ],ty);
2147 		ta[i+13 ] = pow(ta[i+13 ],ty);
2148 		ta[i+14 ] = pow(ta[i+14 ],ty);
2149 		ta[i+15 ] = pow(ta[i+15 ],ty);
2150 	}
2151 for(     ;i<n;++i){ 	ta[i+0 ] = pow(ta[i+0 ],ty);
2152 	 }
2153 }
2154 ;
2155 	}
2156 	else
2157 #endif
2158 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2159 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2160 	{
2161 		float ty = *(float*)y,*ta = a;
2162 	{
2163 for(i=0;i+15<n;i+=16){
2164 	ta[i+0 ] = powf(ta[i+0 ],ty);
2165 		ta[i+1 ] = powf(ta[i+1 ],ty);
2166 		ta[i+2 ] = powf(ta[i+2 ],ty);
2167 		ta[i+3 ] = powf(ta[i+3 ],ty);
2168 		ta[i+4 ] = powf(ta[i+4 ],ty);
2169 		ta[i+5 ] = powf(ta[i+5 ],ty);
2170 		ta[i+6 ] = powf(ta[i+6 ],ty);
2171 		ta[i+7 ] = powf(ta[i+7 ],ty);
2172 		ta[i+8 ] = powf(ta[i+8 ],ty);
2173 		ta[i+9 ] = powf(ta[i+9 ],ty);
2174 		ta[i+10 ] = powf(ta[i+10 ],ty);
2175 		ta[i+11 ] = powf(ta[i+11 ],ty);
2176 		ta[i+12 ] = powf(ta[i+12 ],ty);
2177 		ta[i+13 ] = powf(ta[i+13 ],ty);
2178 		ta[i+14 ] = powf(ta[i+14 ],ty);
2179 		ta[i+15 ] = powf(ta[i+15 ],ty);
2180 	}
2181 for(     ;i<n;++i){ 	ta[i+0 ] = powf(ta[i+0 ],ty);
2182 	 }
2183 }
2184 ;
2185 	}
2186 	else
2187 #endif
2188 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2189 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2190 	{
2191 		float complex ty = *(float complex*)y,*ta = a;
2192 	{
2193 for(i=0;i+15<n;i+=16){
2194 	ta[i+0 ] = cpowf(ta[i+0 ],ty);
2195 		ta[i+1 ] = cpowf(ta[i+1 ],ty);
2196 		ta[i+2 ] = cpowf(ta[i+2 ],ty);
2197 		ta[i+3 ] = cpowf(ta[i+3 ],ty);
2198 		ta[i+4 ] = cpowf(ta[i+4 ],ty);
2199 		ta[i+5 ] = cpowf(ta[i+5 ],ty);
2200 		ta[i+6 ] = cpowf(ta[i+6 ],ty);
2201 		ta[i+7 ] = cpowf(ta[i+7 ],ty);
2202 		ta[i+8 ] = cpowf(ta[i+8 ],ty);
2203 		ta[i+9 ] = cpowf(ta[i+9 ],ty);
2204 		ta[i+10 ] = cpowf(ta[i+10 ],ty);
2205 		ta[i+11 ] = cpowf(ta[i+11 ],ty);
2206 		ta[i+12 ] = cpowf(ta[i+12 ],ty);
2207 		ta[i+13 ] = cpowf(ta[i+13 ],ty);
2208 		ta[i+14 ] = cpowf(ta[i+14 ],ty);
2209 		ta[i+15 ] = cpowf(ta[i+15 ],ty);
2210 	}
2211 for(     ;i<n;++i){ 	ta[i+0 ] = cpowf(ta[i+0 ],ty);
2212 	 }
2213 }
2214 ;
2215 	}
2216 	else
2217 #endif
2218 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2219 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2220 	{
2221 		double complex ty = *(double complex*)y,*ta = a;
2222 	{
2223 for(i=0;i+15<n;i+=16){
2224 	ta[i+0 ] = cpow(ta[i+0 ],ty);
2225 		ta[i+1 ] = cpow(ta[i+1 ],ty);
2226 		ta[i+2 ] = cpow(ta[i+2 ],ty);
2227 		ta[i+3 ] = cpow(ta[i+3 ],ty);
2228 		ta[i+4 ] = cpow(ta[i+4 ],ty);
2229 		ta[i+5 ] = cpow(ta[i+5 ],ty);
2230 		ta[i+6 ] = cpow(ta[i+6 ],ty);
2231 		ta[i+7 ] = cpow(ta[i+7 ],ty);
2232 		ta[i+8 ] = cpow(ta[i+8 ],ty);
2233 		ta[i+9 ] = cpow(ta[i+9 ],ty);
2234 		ta[i+10 ] = cpow(ta[i+10 ],ty);
2235 		ta[i+11 ] = cpow(ta[i+11 ],ty);
2236 		ta[i+12 ] = cpow(ta[i+12 ],ty);
2237 		ta[i+13 ] = cpow(ta[i+13 ],ty);
2238 		ta[i+14 ] = cpow(ta[i+14 ],ty);
2239 		ta[i+15 ] = cpow(ta[i+15 ],ty);
2240 	}
2241 for(     ;i<n;++i){ 	ta[i+0 ] = cpow(ta[i+0 ],ty);
2242 	 }
2243 }
2244 ;
2245 	}
2246 	else
2247 #endif
2248 	return RSB_ERR_UNSUPPORTED_TYPE	;
2249 err:
2250 	return RSB_ERR_NO_ERROR;
2251 }
2252 
rsb__util_vector_sqrt(void * a,rsb_type_t type,size_t n)2253 rsb_err_t rsb__util_vector_sqrt(void * a, rsb_type_t type, size_t n)
2254 {
2255 	/*!
2256 	 * \return \rsberrcodemsg
2257 	 * */
2258 	size_t i;
2259 	if(!a)goto err;
2260 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2261 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2262 	{double*ta = a;
2263 	{
2264 for(i=0;i+15<n;i+=16){
2265 	ta[i+0 ] = sqrt(ta[i+0 ]);
2266 		ta[i+1 ] = sqrt(ta[i+1 ]);
2267 		ta[i+2 ] = sqrt(ta[i+2 ]);
2268 		ta[i+3 ] = sqrt(ta[i+3 ]);
2269 		ta[i+4 ] = sqrt(ta[i+4 ]);
2270 		ta[i+5 ] = sqrt(ta[i+5 ]);
2271 		ta[i+6 ] = sqrt(ta[i+6 ]);
2272 		ta[i+7 ] = sqrt(ta[i+7 ]);
2273 		ta[i+8 ] = sqrt(ta[i+8 ]);
2274 		ta[i+9 ] = sqrt(ta[i+9 ]);
2275 		ta[i+10 ] = sqrt(ta[i+10 ]);
2276 		ta[i+11 ] = sqrt(ta[i+11 ]);
2277 		ta[i+12 ] = sqrt(ta[i+12 ]);
2278 		ta[i+13 ] = sqrt(ta[i+13 ]);
2279 		ta[i+14 ] = sqrt(ta[i+14 ]);
2280 		ta[i+15 ] = sqrt(ta[i+15 ]);
2281 	}
2282 for(     ;i<n;++i){ 	ta[i+0 ] = sqrt(ta[i+0 ]);
2283 	 }
2284 }
2285 ;
2286 	}
2287 	else
2288 #endif
2289 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2290 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2291 	{float*ta = a;
2292 	{
2293 for(i=0;i+15<n;i+=16){
2294 	ta[i+0 ] = sqrtf(ta[i+0 ]);
2295 		ta[i+1 ] = sqrtf(ta[i+1 ]);
2296 		ta[i+2 ] = sqrtf(ta[i+2 ]);
2297 		ta[i+3 ] = sqrtf(ta[i+3 ]);
2298 		ta[i+4 ] = sqrtf(ta[i+4 ]);
2299 		ta[i+5 ] = sqrtf(ta[i+5 ]);
2300 		ta[i+6 ] = sqrtf(ta[i+6 ]);
2301 		ta[i+7 ] = sqrtf(ta[i+7 ]);
2302 		ta[i+8 ] = sqrtf(ta[i+8 ]);
2303 		ta[i+9 ] = sqrtf(ta[i+9 ]);
2304 		ta[i+10 ] = sqrtf(ta[i+10 ]);
2305 		ta[i+11 ] = sqrtf(ta[i+11 ]);
2306 		ta[i+12 ] = sqrtf(ta[i+12 ]);
2307 		ta[i+13 ] = sqrtf(ta[i+13 ]);
2308 		ta[i+14 ] = sqrtf(ta[i+14 ]);
2309 		ta[i+15 ] = sqrtf(ta[i+15 ]);
2310 	}
2311 for(     ;i<n;++i){ 	ta[i+0 ] = sqrtf(ta[i+0 ]);
2312 	 }
2313 }
2314 ;
2315 	}
2316 	else
2317 #endif
2318 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2319 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2320 	{float complex*ta = a;
2321 	{
2322 for(i=0;i+15<n;i+=16){
2323 	ta[i+0 ] = csqrtf(ta[i+0 ]);
2324 		ta[i+1 ] = csqrtf(ta[i+1 ]);
2325 		ta[i+2 ] = csqrtf(ta[i+2 ]);
2326 		ta[i+3 ] = csqrtf(ta[i+3 ]);
2327 		ta[i+4 ] = csqrtf(ta[i+4 ]);
2328 		ta[i+5 ] = csqrtf(ta[i+5 ]);
2329 		ta[i+6 ] = csqrtf(ta[i+6 ]);
2330 		ta[i+7 ] = csqrtf(ta[i+7 ]);
2331 		ta[i+8 ] = csqrtf(ta[i+8 ]);
2332 		ta[i+9 ] = csqrtf(ta[i+9 ]);
2333 		ta[i+10 ] = csqrtf(ta[i+10 ]);
2334 		ta[i+11 ] = csqrtf(ta[i+11 ]);
2335 		ta[i+12 ] = csqrtf(ta[i+12 ]);
2336 		ta[i+13 ] = csqrtf(ta[i+13 ]);
2337 		ta[i+14 ] = csqrtf(ta[i+14 ]);
2338 		ta[i+15 ] = csqrtf(ta[i+15 ]);
2339 	}
2340 for(     ;i<n;++i){ 	ta[i+0 ] = csqrtf(ta[i+0 ]);
2341 	 }
2342 }
2343 ;
2344 	}
2345 	else
2346 #endif
2347 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2348 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2349 	{double complex*ta = a;
2350 	{
2351 for(i=0;i+15<n;i+=16){
2352 	ta[i+0 ] = csqrt(ta[i+0 ]);
2353 		ta[i+1 ] = csqrt(ta[i+1 ]);
2354 		ta[i+2 ] = csqrt(ta[i+2 ]);
2355 		ta[i+3 ] = csqrt(ta[i+3 ]);
2356 		ta[i+4 ] = csqrt(ta[i+4 ]);
2357 		ta[i+5 ] = csqrt(ta[i+5 ]);
2358 		ta[i+6 ] = csqrt(ta[i+6 ]);
2359 		ta[i+7 ] = csqrt(ta[i+7 ]);
2360 		ta[i+8 ] = csqrt(ta[i+8 ]);
2361 		ta[i+9 ] = csqrt(ta[i+9 ]);
2362 		ta[i+10 ] = csqrt(ta[i+10 ]);
2363 		ta[i+11 ] = csqrt(ta[i+11 ]);
2364 		ta[i+12 ] = csqrt(ta[i+12 ]);
2365 		ta[i+13 ] = csqrt(ta[i+13 ]);
2366 		ta[i+14 ] = csqrt(ta[i+14 ]);
2367 		ta[i+15 ] = csqrt(ta[i+15 ]);
2368 	}
2369 for(     ;i<n;++i){ 	ta[i+0 ] = csqrt(ta[i+0 ]);
2370 	 }
2371 }
2372 ;
2373 	}
2374 	else
2375 #endif
2376 	return RSB_ERR_UNSUPPORTED_TYPE	;
2377 err:
2378 	return RSB_ERR_NO_ERROR;
2379 }
2380 
rsb__vector_scale_inv(void * a,const void * alphap,rsb_type_t type,size_t n)2381 rsb_err_t rsb__vector_scale_inv(void * a, const void * alphap, rsb_type_t type, size_t n)
2382 {
2383 	/*!
2384 	 * a <- 1/a * alpha
2385          *
2386 	 * \param array	an array pointer
2387 	 * \param type	a valid type code
2388 	 * \param n	the input array length
2389 	 * \note see dscal in BLAS
2390 	 *
2391 	 * \return \rsberrcodemsg
2392 	 * */
2393 	if(!alphap)
2394 		return RSB_ERR_BADARGS;
2395 
2396 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2397 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2398 	{
2399 		double alphai = ((double)(1.0))/(*(double*)alphap);
2400 		return rsb_vector_scale(a,&alphai,type,n);
2401 	}
2402 	else
2403 #endif
2404 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2405 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2406 	{
2407 		float alphai = ((float)(1.0))/(*(float*)alphap);
2408 		return rsb_vector_scale(a,&alphai,type,n);
2409 	}
2410 	else
2411 #endif
2412 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2413 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2414 	{
2415 		float complex alphai = ((float complex)(1.0))/(*(float complex*)alphap);
2416 		return rsb_vector_scale(a,&alphai,type,n);
2417 	}
2418 	else
2419 #endif
2420 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2421 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2422 	{
2423 		double complex alphai = ((double complex)(1.0))/(*(double complex*)alphap);
2424 		return rsb_vector_scale(a,&alphai,type,n);
2425 	}
2426 	else
2427 #endif
2428 	return RSB_ERR_UNSUPPORTED_TYPE	;
2429 	return RSB_ERR_NO_ERROR;
2430 }
2431 
rsb__vector_sum_of_abs_diffs(void * c,const void * a,const void * b,rsb_type_t type,size_t n)2432 rsb_err_t rsb__vector_sum_of_abs_diffs(void * c, const void * a, const void * b, rsb_type_t type, size_t n)
2433 {
2434 	/*!
2435 	 * \return \rsberrcodemsg
2436 	 * */
2437 	size_t i;
2438 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2439 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2440 	{
2441 		const double*ap = a,*bp = b;
2442 		double ac = ((double)(0));
2443 		{
2444 for(i=0;i+15<n;i+=16){
2445 		ac += fabs(ap[i+0 ]-bp[i+0 ]);
2446 				ac += fabs(ap[i+1 ]-bp[i+1 ]);
2447 				ac += fabs(ap[i+2 ]-bp[i+2 ]);
2448 				ac += fabs(ap[i+3 ]-bp[i+3 ]);
2449 				ac += fabs(ap[i+4 ]-bp[i+4 ]);
2450 				ac += fabs(ap[i+5 ]-bp[i+5 ]);
2451 				ac += fabs(ap[i+6 ]-bp[i+6 ]);
2452 				ac += fabs(ap[i+7 ]-bp[i+7 ]);
2453 				ac += fabs(ap[i+8 ]-bp[i+8 ]);
2454 				ac += fabs(ap[i+9 ]-bp[i+9 ]);
2455 				ac += fabs(ap[i+10 ]-bp[i+10 ]);
2456 				ac += fabs(ap[i+11 ]-bp[i+11 ]);
2457 				ac += fabs(ap[i+12 ]-bp[i+12 ]);
2458 				ac += fabs(ap[i+13 ]-bp[i+13 ]);
2459 				ac += fabs(ap[i+14 ]-bp[i+14 ]);
2460 				ac += fabs(ap[i+15 ]-bp[i+15 ]);
2461 		}
2462 for(     ;i<n;++i){ 		ac += fabs(ap[i+0 ]-bp[i+0 ]);
2463 		 }
2464 }
2465 ;
2466 		*((double*)(c)) = ac;
2467 	}
2468 	else
2469 #endif
2470 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2471 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2472 	{
2473 		const float*ap = a,*bp = b;
2474 		float ac = ((float)(0));
2475 		{
2476 for(i=0;i+15<n;i+=16){
2477 		ac += fabsf(ap[i+0 ]-bp[i+0 ]);
2478 				ac += fabsf(ap[i+1 ]-bp[i+1 ]);
2479 				ac += fabsf(ap[i+2 ]-bp[i+2 ]);
2480 				ac += fabsf(ap[i+3 ]-bp[i+3 ]);
2481 				ac += fabsf(ap[i+4 ]-bp[i+4 ]);
2482 				ac += fabsf(ap[i+5 ]-bp[i+5 ]);
2483 				ac += fabsf(ap[i+6 ]-bp[i+6 ]);
2484 				ac += fabsf(ap[i+7 ]-bp[i+7 ]);
2485 				ac += fabsf(ap[i+8 ]-bp[i+8 ]);
2486 				ac += fabsf(ap[i+9 ]-bp[i+9 ]);
2487 				ac += fabsf(ap[i+10 ]-bp[i+10 ]);
2488 				ac += fabsf(ap[i+11 ]-bp[i+11 ]);
2489 				ac += fabsf(ap[i+12 ]-bp[i+12 ]);
2490 				ac += fabsf(ap[i+13 ]-bp[i+13 ]);
2491 				ac += fabsf(ap[i+14 ]-bp[i+14 ]);
2492 				ac += fabsf(ap[i+15 ]-bp[i+15 ]);
2493 		}
2494 for(     ;i<n;++i){ 		ac += fabsf(ap[i+0 ]-bp[i+0 ]);
2495 		 }
2496 }
2497 ;
2498 		*((float*)(c)) = ac;
2499 	}
2500 	else
2501 #endif
2502 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2503 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2504 	{
2505 		const float complex*ap = a,*bp = b;
2506 		float complex ac = ((float complex)(0));
2507 		{
2508 for(i=0;i+15<n;i+=16){
2509 		ac += cabsf(ap[i+0 ]-bp[i+0 ]);
2510 				ac += cabsf(ap[i+1 ]-bp[i+1 ]);
2511 				ac += cabsf(ap[i+2 ]-bp[i+2 ]);
2512 				ac += cabsf(ap[i+3 ]-bp[i+3 ]);
2513 				ac += cabsf(ap[i+4 ]-bp[i+4 ]);
2514 				ac += cabsf(ap[i+5 ]-bp[i+5 ]);
2515 				ac += cabsf(ap[i+6 ]-bp[i+6 ]);
2516 				ac += cabsf(ap[i+7 ]-bp[i+7 ]);
2517 				ac += cabsf(ap[i+8 ]-bp[i+8 ]);
2518 				ac += cabsf(ap[i+9 ]-bp[i+9 ]);
2519 				ac += cabsf(ap[i+10 ]-bp[i+10 ]);
2520 				ac += cabsf(ap[i+11 ]-bp[i+11 ]);
2521 				ac += cabsf(ap[i+12 ]-bp[i+12 ]);
2522 				ac += cabsf(ap[i+13 ]-bp[i+13 ]);
2523 				ac += cabsf(ap[i+14 ]-bp[i+14 ]);
2524 				ac += cabsf(ap[i+15 ]-bp[i+15 ]);
2525 		}
2526 for(     ;i<n;++i){ 		ac += cabsf(ap[i+0 ]-bp[i+0 ]);
2527 		 }
2528 }
2529 ;
2530 		*((float complex*)(c)) = ac;
2531 	}
2532 	else
2533 #endif
2534 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2535 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2536 	{
2537 		const double complex*ap = a,*bp = b;
2538 		double complex ac = ((double complex)(0));
2539 		{
2540 for(i=0;i+15<n;i+=16){
2541 		ac += cabs(ap[i+0 ]-bp[i+0 ]);
2542 				ac += cabs(ap[i+1 ]-bp[i+1 ]);
2543 				ac += cabs(ap[i+2 ]-bp[i+2 ]);
2544 				ac += cabs(ap[i+3 ]-bp[i+3 ]);
2545 				ac += cabs(ap[i+4 ]-bp[i+4 ]);
2546 				ac += cabs(ap[i+5 ]-bp[i+5 ]);
2547 				ac += cabs(ap[i+6 ]-bp[i+6 ]);
2548 				ac += cabs(ap[i+7 ]-bp[i+7 ]);
2549 				ac += cabs(ap[i+8 ]-bp[i+8 ]);
2550 				ac += cabs(ap[i+9 ]-bp[i+9 ]);
2551 				ac += cabs(ap[i+10 ]-bp[i+10 ]);
2552 				ac += cabs(ap[i+11 ]-bp[i+11 ]);
2553 				ac += cabs(ap[i+12 ]-bp[i+12 ]);
2554 				ac += cabs(ap[i+13 ]-bp[i+13 ]);
2555 				ac += cabs(ap[i+14 ]-bp[i+14 ]);
2556 				ac += cabs(ap[i+15 ]-bp[i+15 ]);
2557 		}
2558 for(     ;i<n;++i){ 		ac += cabs(ap[i+0 ]-bp[i+0 ]);
2559 		 }
2560 }
2561 ;
2562 		*((double complex*)(c)) = ac;
2563 	}
2564 	else
2565 #endif
2566 	return RSB_ERR_UNSUPPORTED_TYPE	;
2567 	return RSB_ERR_NO_ERROR;
2568 }
2569 
rsb__vector_sum_of_abs(void * c,const void * a,rsb_type_t type,size_t n)2570 rsb_err_t rsb__vector_sum_of_abs(void * c, const void * a, rsb_type_t type, size_t n)
2571 {
2572 	/*!
2573 	 * \return \rsberrcodemsg
2574 	 * */
2575 	size_t i;
2576 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2577 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2578 	{
2579 		const double*ap = a;
2580 		double ac = ((double)(0));
2581 		{
2582 for(i=0;i+15<n;i+=16){
2583 		ac += fabs(ap[i+0 ]);
2584 				ac += fabs(ap[i+1 ]);
2585 				ac += fabs(ap[i+2 ]);
2586 				ac += fabs(ap[i+3 ]);
2587 				ac += fabs(ap[i+4 ]);
2588 				ac += fabs(ap[i+5 ]);
2589 				ac += fabs(ap[i+6 ]);
2590 				ac += fabs(ap[i+7 ]);
2591 				ac += fabs(ap[i+8 ]);
2592 				ac += fabs(ap[i+9 ]);
2593 				ac += fabs(ap[i+10 ]);
2594 				ac += fabs(ap[i+11 ]);
2595 				ac += fabs(ap[i+12 ]);
2596 				ac += fabs(ap[i+13 ]);
2597 				ac += fabs(ap[i+14 ]);
2598 				ac += fabs(ap[i+15 ]);
2599 		}
2600 for(     ;i<n;++i){ 		ac += fabs(ap[i+0 ]);
2601 		 }
2602 }
2603 ;
2604 		*((double*)(c)) = ac;
2605 	}
2606 	else
2607 #endif
2608 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2609 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2610 	{
2611 		const float*ap = a;
2612 		float ac = ((float)(0));
2613 		{
2614 for(i=0;i+15<n;i+=16){
2615 		ac += fabsf(ap[i+0 ]);
2616 				ac += fabsf(ap[i+1 ]);
2617 				ac += fabsf(ap[i+2 ]);
2618 				ac += fabsf(ap[i+3 ]);
2619 				ac += fabsf(ap[i+4 ]);
2620 				ac += fabsf(ap[i+5 ]);
2621 				ac += fabsf(ap[i+6 ]);
2622 				ac += fabsf(ap[i+7 ]);
2623 				ac += fabsf(ap[i+8 ]);
2624 				ac += fabsf(ap[i+9 ]);
2625 				ac += fabsf(ap[i+10 ]);
2626 				ac += fabsf(ap[i+11 ]);
2627 				ac += fabsf(ap[i+12 ]);
2628 				ac += fabsf(ap[i+13 ]);
2629 				ac += fabsf(ap[i+14 ]);
2630 				ac += fabsf(ap[i+15 ]);
2631 		}
2632 for(     ;i<n;++i){ 		ac += fabsf(ap[i+0 ]);
2633 		 }
2634 }
2635 ;
2636 		*((float*)(c)) = ac;
2637 	}
2638 	else
2639 #endif
2640 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2641 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2642 	{
2643 		const float complex*ap = a;
2644 		float complex ac = ((float complex)(0));
2645 		{
2646 for(i=0;i+15<n;i+=16){
2647 		ac += cabsf(ap[i+0 ]);
2648 				ac += cabsf(ap[i+1 ]);
2649 				ac += cabsf(ap[i+2 ]);
2650 				ac += cabsf(ap[i+3 ]);
2651 				ac += cabsf(ap[i+4 ]);
2652 				ac += cabsf(ap[i+5 ]);
2653 				ac += cabsf(ap[i+6 ]);
2654 				ac += cabsf(ap[i+7 ]);
2655 				ac += cabsf(ap[i+8 ]);
2656 				ac += cabsf(ap[i+9 ]);
2657 				ac += cabsf(ap[i+10 ]);
2658 				ac += cabsf(ap[i+11 ]);
2659 				ac += cabsf(ap[i+12 ]);
2660 				ac += cabsf(ap[i+13 ]);
2661 				ac += cabsf(ap[i+14 ]);
2662 				ac += cabsf(ap[i+15 ]);
2663 		}
2664 for(     ;i<n;++i){ 		ac += cabsf(ap[i+0 ]);
2665 		 }
2666 }
2667 ;
2668 		*((float complex*)(c)) = ac;
2669 	}
2670 	else
2671 #endif
2672 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2673 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2674 	{
2675 		const double complex*ap = a;
2676 		double complex ac = ((double complex)(0));
2677 		{
2678 for(i=0;i+15<n;i+=16){
2679 		ac += cabs(ap[i+0 ]);
2680 				ac += cabs(ap[i+1 ]);
2681 				ac += cabs(ap[i+2 ]);
2682 				ac += cabs(ap[i+3 ]);
2683 				ac += cabs(ap[i+4 ]);
2684 				ac += cabs(ap[i+5 ]);
2685 				ac += cabs(ap[i+6 ]);
2686 				ac += cabs(ap[i+7 ]);
2687 				ac += cabs(ap[i+8 ]);
2688 				ac += cabs(ap[i+9 ]);
2689 				ac += cabs(ap[i+10 ]);
2690 				ac += cabs(ap[i+11 ]);
2691 				ac += cabs(ap[i+12 ]);
2692 				ac += cabs(ap[i+13 ]);
2693 				ac += cabs(ap[i+14 ]);
2694 				ac += cabs(ap[i+15 ]);
2695 		}
2696 for(     ;i<n;++i){ 		ac += cabs(ap[i+0 ]);
2697 		 }
2698 }
2699 ;
2700 		*((double complex*)(c)) = ac;
2701 	}
2702 	else
2703 #endif
2704 	return RSB_ERR_UNSUPPORTED_TYPE	;
2705 	return RSB_ERR_NO_ERROR;
2706 }
2707 
rsb__vector_to_abs(void * a,rsb_type_t type,size_t n)2708 rsb_err_t rsb__vector_to_abs(void * a, rsb_type_t type, size_t n)
2709 {
2710 	/*!
2711 	 * \return \rsberrcodemsg
2712 	 * */
2713 	size_t i;
2714 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2715 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2716 	{double*ta = a;
2717 	{
2718 for(i=0;i+15<n;i+=16){
2719 	ta[i+0 ] = fabs(ta[i+0 ]);
2720 		ta[i+1 ] = fabs(ta[i+1 ]);
2721 		ta[i+2 ] = fabs(ta[i+2 ]);
2722 		ta[i+3 ] = fabs(ta[i+3 ]);
2723 		ta[i+4 ] = fabs(ta[i+4 ]);
2724 		ta[i+5 ] = fabs(ta[i+5 ]);
2725 		ta[i+6 ] = fabs(ta[i+6 ]);
2726 		ta[i+7 ] = fabs(ta[i+7 ]);
2727 		ta[i+8 ] = fabs(ta[i+8 ]);
2728 		ta[i+9 ] = fabs(ta[i+9 ]);
2729 		ta[i+10 ] = fabs(ta[i+10 ]);
2730 		ta[i+11 ] = fabs(ta[i+11 ]);
2731 		ta[i+12 ] = fabs(ta[i+12 ]);
2732 		ta[i+13 ] = fabs(ta[i+13 ]);
2733 		ta[i+14 ] = fabs(ta[i+14 ]);
2734 		ta[i+15 ] = fabs(ta[i+15 ]);
2735 	}
2736 for(     ;i<n;++i){ 	ta[i+0 ] = fabs(ta[i+0 ]);
2737 	 }
2738 }
2739 ;
2740 	}
2741 	else
2742 #endif
2743 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2744 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2745 	{float*ta = a;
2746 	{
2747 for(i=0;i+15<n;i+=16){
2748 	ta[i+0 ] = fabsf(ta[i+0 ]);
2749 		ta[i+1 ] = fabsf(ta[i+1 ]);
2750 		ta[i+2 ] = fabsf(ta[i+2 ]);
2751 		ta[i+3 ] = fabsf(ta[i+3 ]);
2752 		ta[i+4 ] = fabsf(ta[i+4 ]);
2753 		ta[i+5 ] = fabsf(ta[i+5 ]);
2754 		ta[i+6 ] = fabsf(ta[i+6 ]);
2755 		ta[i+7 ] = fabsf(ta[i+7 ]);
2756 		ta[i+8 ] = fabsf(ta[i+8 ]);
2757 		ta[i+9 ] = fabsf(ta[i+9 ]);
2758 		ta[i+10 ] = fabsf(ta[i+10 ]);
2759 		ta[i+11 ] = fabsf(ta[i+11 ]);
2760 		ta[i+12 ] = fabsf(ta[i+12 ]);
2761 		ta[i+13 ] = fabsf(ta[i+13 ]);
2762 		ta[i+14 ] = fabsf(ta[i+14 ]);
2763 		ta[i+15 ] = fabsf(ta[i+15 ]);
2764 	}
2765 for(     ;i<n;++i){ 	ta[i+0 ] = fabsf(ta[i+0 ]);
2766 	 }
2767 }
2768 ;
2769 	}
2770 	else
2771 #endif
2772 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2773 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2774 	{float complex*ta = a;
2775 	{
2776 for(i=0;i+15<n;i+=16){
2777 	ta[i+0 ] = cabsf(ta[i+0 ]);
2778 		ta[i+1 ] = cabsf(ta[i+1 ]);
2779 		ta[i+2 ] = cabsf(ta[i+2 ]);
2780 		ta[i+3 ] = cabsf(ta[i+3 ]);
2781 		ta[i+4 ] = cabsf(ta[i+4 ]);
2782 		ta[i+5 ] = cabsf(ta[i+5 ]);
2783 		ta[i+6 ] = cabsf(ta[i+6 ]);
2784 		ta[i+7 ] = cabsf(ta[i+7 ]);
2785 		ta[i+8 ] = cabsf(ta[i+8 ]);
2786 		ta[i+9 ] = cabsf(ta[i+9 ]);
2787 		ta[i+10 ] = cabsf(ta[i+10 ]);
2788 		ta[i+11 ] = cabsf(ta[i+11 ]);
2789 		ta[i+12 ] = cabsf(ta[i+12 ]);
2790 		ta[i+13 ] = cabsf(ta[i+13 ]);
2791 		ta[i+14 ] = cabsf(ta[i+14 ]);
2792 		ta[i+15 ] = cabsf(ta[i+15 ]);
2793 	}
2794 for(     ;i<n;++i){ 	ta[i+0 ] = cabsf(ta[i+0 ]);
2795 	 }
2796 }
2797 ;
2798 	}
2799 	else
2800 #endif
2801 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2802 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2803 	{double complex*ta = a;
2804 	{
2805 for(i=0;i+15<n;i+=16){
2806 	ta[i+0 ] = cabs(ta[i+0 ]);
2807 		ta[i+1 ] = cabs(ta[i+1 ]);
2808 		ta[i+2 ] = cabs(ta[i+2 ]);
2809 		ta[i+3 ] = cabs(ta[i+3 ]);
2810 		ta[i+4 ] = cabs(ta[i+4 ]);
2811 		ta[i+5 ] = cabs(ta[i+5 ]);
2812 		ta[i+6 ] = cabs(ta[i+6 ]);
2813 		ta[i+7 ] = cabs(ta[i+7 ]);
2814 		ta[i+8 ] = cabs(ta[i+8 ]);
2815 		ta[i+9 ] = cabs(ta[i+9 ]);
2816 		ta[i+10 ] = cabs(ta[i+10 ]);
2817 		ta[i+11 ] = cabs(ta[i+11 ]);
2818 		ta[i+12 ] = cabs(ta[i+12 ]);
2819 		ta[i+13 ] = cabs(ta[i+13 ]);
2820 		ta[i+14 ] = cabs(ta[i+14 ]);
2821 		ta[i+15 ] = cabs(ta[i+15 ]);
2822 	}
2823 for(     ;i<n;++i){ 	ta[i+0 ] = cabs(ta[i+0 ]);
2824 	 }
2825 }
2826 ;
2827 	}
2828 	else
2829 #endif
2830 	return RSB_ERR_UNSUPPORTED_TYPE	;
2831 	return RSB_ERR_NO_ERROR;
2832 }
2833 
rsb_alpha_sum(void * a,const void * b,const void * alphap,rsb_type_t type,size_t n)2834 static rsb_err_t rsb_alpha_sum(void * a, const void * b, const void * alphap, rsb_type_t type, size_t n)
2835 {
2836 	/*!
2837 	 * a <- a + alpha * b
2838          *
2839 	 * \param array	an array pointer
2840 	 * \param type	a valid type code
2841 	 * \param n	the input array length
2842 	 * \note see daxpy in BLAS
2843 	 *
2844 	 * \return \rsberrcodemsg
2845 	 * */
2846 	size_t i;
2847 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2848 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
2849 	{
2850 	const double alpha = alphap ? *(double*)alphap : ((double)(1.0));
2851 	double*ta = a; const double*tb = b;
2852 	{
2853 for(i=0;i+15<n;i+=16){
2854 	ta[i+0 ]+=alpha*tb[i+0 ];
2855 		ta[i+1 ]+=alpha*tb[i+1 ];
2856 		ta[i+2 ]+=alpha*tb[i+2 ];
2857 		ta[i+3 ]+=alpha*tb[i+3 ];
2858 		ta[i+4 ]+=alpha*tb[i+4 ];
2859 		ta[i+5 ]+=alpha*tb[i+5 ];
2860 		ta[i+6 ]+=alpha*tb[i+6 ];
2861 		ta[i+7 ]+=alpha*tb[i+7 ];
2862 		ta[i+8 ]+=alpha*tb[i+8 ];
2863 		ta[i+9 ]+=alpha*tb[i+9 ];
2864 		ta[i+10 ]+=alpha*tb[i+10 ];
2865 		ta[i+11 ]+=alpha*tb[i+11 ];
2866 		ta[i+12 ]+=alpha*tb[i+12 ];
2867 		ta[i+13 ]+=alpha*tb[i+13 ];
2868 		ta[i+14 ]+=alpha*tb[i+14 ];
2869 		ta[i+15 ]+=alpha*tb[i+15 ];
2870 	}
2871 for(     ;i<n;++i){ 	ta[i+0 ]+=alpha*tb[i+0 ];
2872 	 }
2873 }
2874 ;
2875 	}
2876 	else
2877 #endif
2878 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2879 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
2880 	{
2881 	const float alpha = alphap ? *(float*)alphap : ((float)(1.0));
2882 	float*ta = a; const float*tb = b;
2883 	{
2884 for(i=0;i+15<n;i+=16){
2885 	ta[i+0 ]+=alpha*tb[i+0 ];
2886 		ta[i+1 ]+=alpha*tb[i+1 ];
2887 		ta[i+2 ]+=alpha*tb[i+2 ];
2888 		ta[i+3 ]+=alpha*tb[i+3 ];
2889 		ta[i+4 ]+=alpha*tb[i+4 ];
2890 		ta[i+5 ]+=alpha*tb[i+5 ];
2891 		ta[i+6 ]+=alpha*tb[i+6 ];
2892 		ta[i+7 ]+=alpha*tb[i+7 ];
2893 		ta[i+8 ]+=alpha*tb[i+8 ];
2894 		ta[i+9 ]+=alpha*tb[i+9 ];
2895 		ta[i+10 ]+=alpha*tb[i+10 ];
2896 		ta[i+11 ]+=alpha*tb[i+11 ];
2897 		ta[i+12 ]+=alpha*tb[i+12 ];
2898 		ta[i+13 ]+=alpha*tb[i+13 ];
2899 		ta[i+14 ]+=alpha*tb[i+14 ];
2900 		ta[i+15 ]+=alpha*tb[i+15 ];
2901 	}
2902 for(     ;i<n;++i){ 	ta[i+0 ]+=alpha*tb[i+0 ];
2903 	 }
2904 }
2905 ;
2906 	}
2907 	else
2908 #endif
2909 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2910 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
2911 	{
2912 	const float complex alpha = alphap ? *(float complex*)alphap : ((float complex)(1.0));
2913 	float complex*ta = a; const float complex*tb = b;
2914 	{
2915 for(i=0;i+15<n;i+=16){
2916 	ta[i+0 ]+=alpha*tb[i+0 ];
2917 		ta[i+1 ]+=alpha*tb[i+1 ];
2918 		ta[i+2 ]+=alpha*tb[i+2 ];
2919 		ta[i+3 ]+=alpha*tb[i+3 ];
2920 		ta[i+4 ]+=alpha*tb[i+4 ];
2921 		ta[i+5 ]+=alpha*tb[i+5 ];
2922 		ta[i+6 ]+=alpha*tb[i+6 ];
2923 		ta[i+7 ]+=alpha*tb[i+7 ];
2924 		ta[i+8 ]+=alpha*tb[i+8 ];
2925 		ta[i+9 ]+=alpha*tb[i+9 ];
2926 		ta[i+10 ]+=alpha*tb[i+10 ];
2927 		ta[i+11 ]+=alpha*tb[i+11 ];
2928 		ta[i+12 ]+=alpha*tb[i+12 ];
2929 		ta[i+13 ]+=alpha*tb[i+13 ];
2930 		ta[i+14 ]+=alpha*tb[i+14 ];
2931 		ta[i+15 ]+=alpha*tb[i+15 ];
2932 	}
2933 for(     ;i<n;++i){ 	ta[i+0 ]+=alpha*tb[i+0 ];
2934 	 }
2935 }
2936 ;
2937 	}
2938 	else
2939 #endif
2940 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2941 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
2942 	{
2943 	const double complex alpha = alphap ? *(double complex*)alphap : ((double complex)(1.0));
2944 	double complex*ta = a; const double complex*tb = b;
2945 	{
2946 for(i=0;i+15<n;i+=16){
2947 	ta[i+0 ]+=alpha*tb[i+0 ];
2948 		ta[i+1 ]+=alpha*tb[i+1 ];
2949 		ta[i+2 ]+=alpha*tb[i+2 ];
2950 		ta[i+3 ]+=alpha*tb[i+3 ];
2951 		ta[i+4 ]+=alpha*tb[i+4 ];
2952 		ta[i+5 ]+=alpha*tb[i+5 ];
2953 		ta[i+6 ]+=alpha*tb[i+6 ];
2954 		ta[i+7 ]+=alpha*tb[i+7 ];
2955 		ta[i+8 ]+=alpha*tb[i+8 ];
2956 		ta[i+9 ]+=alpha*tb[i+9 ];
2957 		ta[i+10 ]+=alpha*tb[i+10 ];
2958 		ta[i+11 ]+=alpha*tb[i+11 ];
2959 		ta[i+12 ]+=alpha*tb[i+12 ];
2960 		ta[i+13 ]+=alpha*tb[i+13 ];
2961 		ta[i+14 ]+=alpha*tb[i+14 ];
2962 		ta[i+15 ]+=alpha*tb[i+15 ];
2963 	}
2964 for(     ;i<n;++i){ 	ta[i+0 ]+=alpha*tb[i+0 ];
2965 	 }
2966 }
2967 ;
2968 	}
2969 	else
2970 #endif
2971 	return RSB_ERR_UNSUPPORTED_TYPE	;
2972 	return RSB_ERR_NO_ERROR;
2973 }
2974 
2975 
rsb__util_set_array_to_converted_integer(void * p,rsb_flags_t typecode,const rsb_nnz_idx_t n,const rsb_nnz_idx_t incp,const rsb_int v)2976 rsb_err_t rsb__util_set_array_to_converted_integer(void *p, rsb_flags_t typecode, const rsb_nnz_idx_t n, const rsb_nnz_idx_t incp, const rsb_int v)
2977 {
2978 	/*!
2979 	 * */
2980 	size_t i;
2981 
2982 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2983 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
2984 	{
2985 	double*tp = p; const double tv = (double)v;
2986 	{
2987 for(i=0;i+15<n;i+=16){
2988 	tp[((i+0 )*incp)] = tv;
2989 		tp[((i+1 )*incp)] = tv;
2990 		tp[((i+2 )*incp)] = tv;
2991 		tp[((i+3 )*incp)] = tv;
2992 		tp[((i+4 )*incp)] = tv;
2993 		tp[((i+5 )*incp)] = tv;
2994 		tp[((i+6 )*incp)] = tv;
2995 		tp[((i+7 )*incp)] = tv;
2996 		tp[((i+8 )*incp)] = tv;
2997 		tp[((i+9 )*incp)] = tv;
2998 		tp[((i+10 )*incp)] = tv;
2999 		tp[((i+11 )*incp)] = tv;
3000 		tp[((i+12 )*incp)] = tv;
3001 		tp[((i+13 )*incp)] = tv;
3002 		tp[((i+14 )*incp)] = tv;
3003 		tp[((i+15 )*incp)] = tv;
3004 	}
3005 for(     ;i<n;++i){ 	tp[((i+0 )*incp)] = tv;
3006 	 }
3007 }
3008 ;
3009 	}
3010 	else
3011 #endif
3012 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3013 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
3014 	{
3015 	float*tp = p; const float tv = (float)v;
3016 	{
3017 for(i=0;i+15<n;i+=16){
3018 	tp[((i+0 )*incp)] = tv;
3019 		tp[((i+1 )*incp)] = tv;
3020 		tp[((i+2 )*incp)] = tv;
3021 		tp[((i+3 )*incp)] = tv;
3022 		tp[((i+4 )*incp)] = tv;
3023 		tp[((i+5 )*incp)] = tv;
3024 		tp[((i+6 )*incp)] = tv;
3025 		tp[((i+7 )*incp)] = tv;
3026 		tp[((i+8 )*incp)] = tv;
3027 		tp[((i+9 )*incp)] = tv;
3028 		tp[((i+10 )*incp)] = tv;
3029 		tp[((i+11 )*incp)] = tv;
3030 		tp[((i+12 )*incp)] = tv;
3031 		tp[((i+13 )*incp)] = tv;
3032 		tp[((i+14 )*incp)] = tv;
3033 		tp[((i+15 )*incp)] = tv;
3034 	}
3035 for(     ;i<n;++i){ 	tp[((i+0 )*incp)] = tv;
3036 	 }
3037 }
3038 ;
3039 	}
3040 	else
3041 #endif
3042 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3043 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3044 	{
3045 	float complex*tp = p; const float complex tv = (float complex)v;
3046 	{
3047 for(i=0;i+15<n;i+=16){
3048 	tp[((i+0 )*incp)] = tv;
3049 		tp[((i+1 )*incp)] = tv;
3050 		tp[((i+2 )*incp)] = tv;
3051 		tp[((i+3 )*incp)] = tv;
3052 		tp[((i+4 )*incp)] = tv;
3053 		tp[((i+5 )*incp)] = tv;
3054 		tp[((i+6 )*incp)] = tv;
3055 		tp[((i+7 )*incp)] = tv;
3056 		tp[((i+8 )*incp)] = tv;
3057 		tp[((i+9 )*incp)] = tv;
3058 		tp[((i+10 )*incp)] = tv;
3059 		tp[((i+11 )*incp)] = tv;
3060 		tp[((i+12 )*incp)] = tv;
3061 		tp[((i+13 )*incp)] = tv;
3062 		tp[((i+14 )*incp)] = tv;
3063 		tp[((i+15 )*incp)] = tv;
3064 	}
3065 for(     ;i<n;++i){ 	tp[((i+0 )*incp)] = tv;
3066 	 }
3067 }
3068 ;
3069 	}
3070 	else
3071 #endif
3072 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3073 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
3074 	{
3075 	double complex*tp = p; const double complex tv = (double complex)v;
3076 	{
3077 for(i=0;i+15<n;i+=16){
3078 	tp[((i+0 )*incp)] = tv;
3079 		tp[((i+1 )*incp)] = tv;
3080 		tp[((i+2 )*incp)] = tv;
3081 		tp[((i+3 )*incp)] = tv;
3082 		tp[((i+4 )*incp)] = tv;
3083 		tp[((i+5 )*incp)] = tv;
3084 		tp[((i+6 )*incp)] = tv;
3085 		tp[((i+7 )*incp)] = tv;
3086 		tp[((i+8 )*incp)] = tv;
3087 		tp[((i+9 )*incp)] = tv;
3088 		tp[((i+10 )*incp)] = tv;
3089 		tp[((i+11 )*incp)] = tv;
3090 		tp[((i+12 )*incp)] = tv;
3091 		tp[((i+13 )*incp)] = tv;
3092 		tp[((i+14 )*incp)] = tv;
3093 		tp[((i+15 )*incp)] = tv;
3094 	}
3095 for(     ;i<n;++i){ 	tp[((i+0 )*incp)] = tv;
3096 	 }
3097 }
3098 ;
3099 	}
3100 	else
3101 #endif
3102 	return RSB_ERR_UNSUPPORTED_TYPE	;
3103 	return RSB_ERR_NO_ERROR;
3104 }
3105 
rsb__vectors_left_sum_reduce_and_zero(void * d,void * s,const rsb_type_t typecode,const size_t n,const size_t incd,const size_t off)3106 rsb_err_t rsb__vectors_left_sum_reduce_and_zero(void * d, void * s, const rsb_type_t typecode, const size_t n, const size_t incd, const size_t off)
3107 {
3108 	/*!
3109 	 * d[off:off+n-1] <- s[off:off+n-1]
3110 	 * s[off:off+n-1] <- 0
3111          *
3112 	 * \param array	an array pointer
3113 	 * \param typecode	a valid type code
3114 	 * \param incd	the stride of d
3115 	 * \param off offset in the vectors
3116 	 * \return \rsberrcodemsg
3117 	 * */
3118 	size_t i;
3119 
3120 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3121 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
3122 	{
3123 	double*td = d,*ts = s;
3124 	{
3125 for(i=0;i+15<n;i+=16){
3126 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3127 	ts[(off+i+0 )] = ((double)(0));
3128 		td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3129 	ts[(off+i+1 )] = ((double)(0));
3130 		td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3131 	ts[(off+i+2 )] = ((double)(0));
3132 		td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3133 	ts[(off+i+3 )] = ((double)(0));
3134 		td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3135 	ts[(off+i+4 )] = ((double)(0));
3136 		td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3137 	ts[(off+i+5 )] = ((double)(0));
3138 		td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3139 	ts[(off+i+6 )] = ((double)(0));
3140 		td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3141 	ts[(off+i+7 )] = ((double)(0));
3142 		td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3143 	ts[(off+i+8 )] = ((double)(0));
3144 		td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3145 	ts[(off+i+9 )] = ((double)(0));
3146 		td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3147 	ts[(off+i+10 )] = ((double)(0));
3148 		td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3149 	ts[(off+i+11 )] = ((double)(0));
3150 		td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3151 	ts[(off+i+12 )] = ((double)(0));
3152 		td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3153 	ts[(off+i+13 )] = ((double)(0));
3154 		td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3155 	ts[(off+i+14 )] = ((double)(0));
3156 		td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3157 	ts[(off+i+15 )] = ((double)(0));
3158 	}
3159 for(     ;i<n;++i){ 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3160 	ts[(off+i+0 )] = ((double)(0));
3161 	 }
3162 }
3163 ;
3164 	}
3165 	else
3166 #endif
3167 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3168 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
3169 	{
3170 	float*td = d,*ts = s;
3171 	{
3172 for(i=0;i+15<n;i+=16){
3173 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3174 	ts[(off+i+0 )] = ((float)(0));
3175 		td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3176 	ts[(off+i+1 )] = ((float)(0));
3177 		td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3178 	ts[(off+i+2 )] = ((float)(0));
3179 		td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3180 	ts[(off+i+3 )] = ((float)(0));
3181 		td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3182 	ts[(off+i+4 )] = ((float)(0));
3183 		td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3184 	ts[(off+i+5 )] = ((float)(0));
3185 		td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3186 	ts[(off+i+6 )] = ((float)(0));
3187 		td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3188 	ts[(off+i+7 )] = ((float)(0));
3189 		td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3190 	ts[(off+i+8 )] = ((float)(0));
3191 		td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3192 	ts[(off+i+9 )] = ((float)(0));
3193 		td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3194 	ts[(off+i+10 )] = ((float)(0));
3195 		td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3196 	ts[(off+i+11 )] = ((float)(0));
3197 		td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3198 	ts[(off+i+12 )] = ((float)(0));
3199 		td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3200 	ts[(off+i+13 )] = ((float)(0));
3201 		td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3202 	ts[(off+i+14 )] = ((float)(0));
3203 		td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3204 	ts[(off+i+15 )] = ((float)(0));
3205 	}
3206 for(     ;i<n;++i){ 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3207 	ts[(off+i+0 )] = ((float)(0));
3208 	 }
3209 }
3210 ;
3211 	}
3212 	else
3213 #endif
3214 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3215 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3216 	{
3217 	float complex*td = d,*ts = s;
3218 	{
3219 for(i=0;i+15<n;i+=16){
3220 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3221 	ts[(off+i+0 )] = ((float complex)(0));
3222 		td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3223 	ts[(off+i+1 )] = ((float complex)(0));
3224 		td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3225 	ts[(off+i+2 )] = ((float complex)(0));
3226 		td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3227 	ts[(off+i+3 )] = ((float complex)(0));
3228 		td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3229 	ts[(off+i+4 )] = ((float complex)(0));
3230 		td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3231 	ts[(off+i+5 )] = ((float complex)(0));
3232 		td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3233 	ts[(off+i+6 )] = ((float complex)(0));
3234 		td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3235 	ts[(off+i+7 )] = ((float complex)(0));
3236 		td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3237 	ts[(off+i+8 )] = ((float complex)(0));
3238 		td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3239 	ts[(off+i+9 )] = ((float complex)(0));
3240 		td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3241 	ts[(off+i+10 )] = ((float complex)(0));
3242 		td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3243 	ts[(off+i+11 )] = ((float complex)(0));
3244 		td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3245 	ts[(off+i+12 )] = ((float complex)(0));
3246 		td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3247 	ts[(off+i+13 )] = ((float complex)(0));
3248 		td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3249 	ts[(off+i+14 )] = ((float complex)(0));
3250 		td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3251 	ts[(off+i+15 )] = ((float complex)(0));
3252 	}
3253 for(     ;i<n;++i){ 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3254 	ts[(off+i+0 )] = ((float complex)(0));
3255 	 }
3256 }
3257 ;
3258 	}
3259 	else
3260 #endif
3261 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3262 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
3263 	{
3264 	double complex*td = d,*ts = s;
3265 	{
3266 for(i=0;i+15<n;i+=16){
3267 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3268 	ts[(off+i+0 )] = ((double complex)(0));
3269 		td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3270 	ts[(off+i+1 )] = ((double complex)(0));
3271 		td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3272 	ts[(off+i+2 )] = ((double complex)(0));
3273 		td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3274 	ts[(off+i+3 )] = ((double complex)(0));
3275 		td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3276 	ts[(off+i+4 )] = ((double complex)(0));
3277 		td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3278 	ts[(off+i+5 )] = ((double complex)(0));
3279 		td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3280 	ts[(off+i+6 )] = ((double complex)(0));
3281 		td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3282 	ts[(off+i+7 )] = ((double complex)(0));
3283 		td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3284 	ts[(off+i+8 )] = ((double complex)(0));
3285 		td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3286 	ts[(off+i+9 )] = ((double complex)(0));
3287 		td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3288 	ts[(off+i+10 )] = ((double complex)(0));
3289 		td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3290 	ts[(off+i+11 )] = ((double complex)(0));
3291 		td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3292 	ts[(off+i+12 )] = ((double complex)(0));
3293 		td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3294 	ts[(off+i+13 )] = ((double complex)(0));
3295 		td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3296 	ts[(off+i+14 )] = ((double complex)(0));
3297 		td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3298 	ts[(off+i+15 )] = ((double complex)(0));
3299 	}
3300 for(     ;i<n;++i){ 	td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3301 	ts[(off+i+0 )] = ((double complex)(0));
3302 	 }
3303 }
3304 ;
3305 	}
3306 	else
3307 #endif
3308 	return RSB_ERR_UNSUPPORTED_TYPE	;
3309 	return RSB_ERR_NO_ERROR;
3310 }
3311 
3312 
rsb_alpha_sum_strided(void * a,const void * b,const void * alphap,rsb_type_t type,size_t n,int inca,int incb)3313 static rsb_err_t rsb_alpha_sum_strided(void * a, const void * b, const void * alphap, rsb_type_t type, size_t n, int inca, int incb)
3314 {
3315 	/*!
3316 	 * a <- a + alpha * b
3317          *
3318 	 * \param array	an array pointer
3319 	 * \param type	a valid type code
3320 	 * \param n	the input array length
3321 	 * \note see daxpy in BLAS
3322 	 *
3323 	 * \return \rsberrcodemsg
3324 	 * */
3325 	size_t i;
3326 	if(inca == 1 && incb == 1)
3327 		return rsb_alpha_sum(a,b,alphap,type,n);
3328 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3329 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
3330 	{
3331 	const double alpha = alphap ? *(double*)alphap : ((double)(1.0));
3332 	double*ta = a; const double*tb = b;
3333 	{
3334 for(i=0;i+15<n;i+=16){
3335 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3336 		ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3337 		ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3338 		ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3339 		ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3340 		ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3341 		ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3342 		ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3343 		ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3344 		ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3345 		ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3346 		ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3347 		ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3348 		ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3349 		ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3350 		ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3351 	}
3352 for(     ;i<n;++i){ 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3353 	 }
3354 }
3355 ;
3356 	}
3357 	else
3358 #endif
3359 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3360 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
3361 	{
3362 	const float alpha = alphap ? *(float*)alphap : ((float)(1.0));
3363 	float*ta = a; const float*tb = b;
3364 	{
3365 for(i=0;i+15<n;i+=16){
3366 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3367 		ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3368 		ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3369 		ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3370 		ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3371 		ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3372 		ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3373 		ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3374 		ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3375 		ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3376 		ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3377 		ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3378 		ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3379 		ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3380 		ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3381 		ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3382 	}
3383 for(     ;i<n;++i){ 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3384 	 }
3385 }
3386 ;
3387 	}
3388 	else
3389 #endif
3390 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3391 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3392 	{
3393 	const float complex alpha = alphap ? *(float complex*)alphap : ((float complex)(1.0));
3394 	float complex*ta = a; const float complex*tb = b;
3395 	{
3396 for(i=0;i+15<n;i+=16){
3397 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3398 		ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3399 		ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3400 		ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3401 		ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3402 		ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3403 		ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3404 		ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3405 		ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3406 		ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3407 		ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3408 		ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3409 		ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3410 		ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3411 		ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3412 		ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3413 	}
3414 for(     ;i<n;++i){ 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3415 	 }
3416 }
3417 ;
3418 	}
3419 	else
3420 #endif
3421 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3422 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
3423 	{
3424 	const double complex alpha = alphap ? *(double complex*)alphap : ((double complex)(1.0));
3425 	double complex*ta = a; const double complex*tb = b;
3426 	{
3427 for(i=0;i+15<n;i+=16){
3428 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3429 		ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3430 		ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3431 		ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3432 		ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3433 		ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3434 		ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3435 		ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3436 		ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3437 		ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3438 		ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3439 		ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3440 		ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3441 		ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3442 		ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3443 		ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3444 	}
3445 for(     ;i<n;++i){ 	ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3446 	 }
3447 }
3448 ;
3449 	}
3450 	else
3451 #endif
3452 	return RSB_ERR_UNSUPPORTED_TYPE	;
3453 	return RSB_ERR_NO_ERROR;
3454 }
3455 
rsb__cblas_Xaxpy(rsb_type_t type,size_t n,const void * alphap,const void * x,const int incx,void * y,const int incy)3456 rsb_err_t rsb__cblas_Xaxpy(rsb_type_t type, size_t n, const void * alphap, const void * x, const int incx, void * y, const int incy)
3457 {
3458 	/*!
3459 	 * y <- y + alpha * x
3460          */
3461 	return rsb_alpha_sum_strided(y,x,alphap,type,n,incy,incx);
3462 }
3463 
rsb__vector_mult(const void * a,const void * b,void * c,rsb_type_t type,size_t n)3464 rsb_err_t rsb__vector_mult(const void * a, const void * b, void * c, rsb_type_t type, size_t n)
3465 {
3466 	/*!
3467 	 * c <- a*b
3468 	 * It is allowed to give c == a or c == b or a == b
3469          *
3470 	 * \param array	an array pointer
3471 	 * \param type	a valid type code
3472 	 * \param n	the input array length
3473 	 *
3474 	 * FIXME : useless ?
3475 	 *
3476 	 * \return \rsberrcodemsg
3477 	 * */
3478 	size_t i;
3479 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3480 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
3481 	{
3482 	const double*ta = a; const double*tb = b; double*tc = c;
3483 	{
3484 for(i=0;i+15<n;i+=16){
3485 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3486 		tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3487 		tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3488 		tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3489 		tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3490 		tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3491 		tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3492 		tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3493 		tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3494 		tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3495 		tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3496 		tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3497 		tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3498 		tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3499 		tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3500 		tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3501 	}
3502 for(     ;i<n;++i){ 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3503 	 }
3504 }
3505 ;
3506 	}
3507 	else
3508 #endif
3509 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3510 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
3511 	{
3512 	const float*ta = a; const float*tb = b; float*tc = c;
3513 	{
3514 for(i=0;i+15<n;i+=16){
3515 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3516 		tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3517 		tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3518 		tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3519 		tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3520 		tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3521 		tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3522 		tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3523 		tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3524 		tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3525 		tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3526 		tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3527 		tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3528 		tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3529 		tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3530 		tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3531 	}
3532 for(     ;i<n;++i){ 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3533 	 }
3534 }
3535 ;
3536 	}
3537 	else
3538 #endif
3539 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3540 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3541 	{
3542 	const float complex*ta = a; const float complex*tb = b; float complex*tc = c;
3543 	{
3544 for(i=0;i+15<n;i+=16){
3545 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3546 		tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3547 		tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3548 		tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3549 		tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3550 		tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3551 		tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3552 		tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3553 		tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3554 		tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3555 		tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3556 		tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3557 		tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3558 		tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3559 		tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3560 		tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3561 	}
3562 for(     ;i<n;++i){ 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3563 	 }
3564 }
3565 ;
3566 	}
3567 	else
3568 #endif
3569 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3570 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
3571 	{
3572 	const double complex*ta = a; const double complex*tb = b; double complex*tc = c;
3573 	{
3574 for(i=0;i+15<n;i+=16){
3575 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3576 		tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3577 		tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3578 		tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3579 		tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3580 		tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3581 		tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3582 		tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3583 		tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3584 		tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3585 		tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3586 		tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3587 		tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3588 		tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3589 		tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3590 		tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3591 	}
3592 for(     ;i<n;++i){ 	tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3593 	 }
3594 }
3595 ;
3596 	}
3597 	else
3598 #endif
3599 	return RSB_ERR_UNSUPPORTED_TYPE	;
3600 	return RSB_ERR_NO_ERROR;
3601 }
3602 
rsb__xcopy(void * a,const void * b,rsb_nnz_idx_t toi,rsb_nnz_idx_t foi,rsb_nnz_idx_t n,size_t el_size)3603 rsb_err_t rsb__xcopy(void * a, const void * b, rsb_nnz_idx_t toi, rsb_nnz_idx_t foi, rsb_nnz_idx_t n,size_t el_size)
3604 {
3605 	/*!
3606 	 * a[toi:toi+n] <- b[foi:foi+n]
3607          *
3608 	 * \param array	an array pointer
3609 	 * \param type	a valid type code
3610 	 *
3611 	 * \return \rsberrcodemsg
3612 	 * */
3613 	rsb__memcpy(((rsb_byte_t*)a)+el_size*toi,((const rsb_byte_t*)b)+el_size*foi,el_size*n);
3614 	return RSB_ERR_NO_ERROR;
3615 }
3616 
rsb__do_are_same(const void * ap,const void * bp,rsb_nnz_idx_t n,rsb_type_t typecode,rsb_nnz_idx_t incx,rsb_nnz_idx_t incy)3617 rsb_err_t rsb__do_are_same(const void * ap, const void * bp, rsb_nnz_idx_t n,rsb_type_t typecode, rsb_nnz_idx_t incx, rsb_nnz_idx_t incy)
3618 {
3619 	/*!
3620          *
3621 	 * \param array	an array pointer
3622 	 * \param type	a valid type code
3623 	 *
3624 	 * \return \rsberrcodemsg
3625 	 *
3626 	 * For cases like 1+0I differing from 1-0I ..
3627 	 * */
3628 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3629 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
3630 	{
3631 	rsb_nnz_idx_t i;
3632 	const double *a = ap; const double *b = bp;
3633 
3634 	{
3635 for(i=0;i+15<n;i+=16){
3636 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3637 	if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3638 	if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3639 	if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3640 	if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3641 	if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3642 	if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3643 	if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3644 	if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3645 	if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3646 	if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3647 	if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3648 	if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3649 	if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3650 	if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3651 	if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3652 }
3653 for(     ;i<n;++i){ 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3654  }
3655 }
3656 		return RSB_ERR_NO_ERROR;
3657 	}
3658 	else
3659 #endif
3660 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3661 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
3662 	{
3663 	rsb_nnz_idx_t i;
3664 	const float *a = ap; const float *b = bp;
3665 
3666 	{
3667 for(i=0;i+15<n;i+=16){
3668 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3669 	if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3670 	if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3671 	if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3672 	if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3673 	if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3674 	if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3675 	if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3676 	if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3677 	if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3678 	if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3679 	if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3680 	if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3681 	if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3682 	if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3683 	if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3684 }
3685 for(     ;i<n;++i){ 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3686  }
3687 }
3688 		return RSB_ERR_NO_ERROR;
3689 	}
3690 	else
3691 #endif
3692 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3693 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3694 	{
3695 	rsb_nnz_idx_t i;
3696 	const float complex *a = ap; const float complex *b = bp;
3697 
3698 	{
3699 for(i=0;i+15<n;i+=16){
3700 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3701 	if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3702 	if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3703 	if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3704 	if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3705 	if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3706 	if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3707 	if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3708 	if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3709 	if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3710 	if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3711 	if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3712 	if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3713 	if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3714 	if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3715 	if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3716 }
3717 for(     ;i<n;++i){ 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3718  }
3719 }
3720 		return RSB_ERR_NO_ERROR;
3721 	}
3722 	else
3723 #endif
3724 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3725 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
3726 	{
3727 	rsb_nnz_idx_t i;
3728 	const double complex *a = ap; const double complex *b = bp;
3729 
3730 	{
3731 for(i=0;i+15<n;i+=16){
3732 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3733 	if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3734 	if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3735 	if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3736 	if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3737 	if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3738 	if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3739 	if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3740 	if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3741 	if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3742 	if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3743 	if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3744 	if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3745 	if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3746 	if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3747 	if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3748 }
3749 for(     ;i<n;++i){ 	if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3750  }
3751 }
3752 		return RSB_ERR_NO_ERROR;
3753 	}
3754 	else
3755 #endif
3756 	return RSB_ERR_UNSUPPORTED_TYPE;
3757 differing:
3758 	return RSB_ERR_GENERIC_ERROR;
3759 }
3760 
rsb__xcopy_strided_typed(void * a,const void * b,rsb_nnz_idx_t toi,rsb_nnz_idx_t foi,rsb_nnz_idx_t n,rsb_type_t typecode,rsb_nnz_idx_t incx,rsb_nnz_idx_t incy)3761 static rsb_err_t rsb__xcopy_strided_typed(void * a, const void * b, rsb_nnz_idx_t toi, rsb_nnz_idx_t foi, rsb_nnz_idx_t n,rsb_type_t typecode, rsb_nnz_idx_t incx, rsb_nnz_idx_t incy)
3762 {
3763 	/*!
3764 	 * a[toi:toi+n] <- b[foi:foi+n]
3765          *
3766 	 * \param array	an array pointer
3767 	 * \param type	a valid type code
3768 	 *
3769 	 * \return \rsberrcodemsg
3770 	 * */
3771 	if(incx==1 && incy==1)
3772 		return rsb__xcopy(a,b,toi,foi,n,RSB_SIZEOF(typecode));
3773 	/* else */
3774 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3775 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
3776 	{
3777 	rsb_nnz_idx_t i;
3778 	double *ap = a; const double *bp = b;
3779 	ap+=toi;
3780 	bp+=foi;
3781 	{
3782 for(i=0;i+15<n;i+=16){
3783 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3784 		ap[(i+1 )*incx] = bp[(i+1 )*incy];
3785 		ap[(i+2 )*incx] = bp[(i+2 )*incy];
3786 		ap[(i+3 )*incx] = bp[(i+3 )*incy];
3787 		ap[(i+4 )*incx] = bp[(i+4 )*incy];
3788 		ap[(i+5 )*incx] = bp[(i+5 )*incy];
3789 		ap[(i+6 )*incx] = bp[(i+6 )*incy];
3790 		ap[(i+7 )*incx] = bp[(i+7 )*incy];
3791 		ap[(i+8 )*incx] = bp[(i+8 )*incy];
3792 		ap[(i+9 )*incx] = bp[(i+9 )*incy];
3793 		ap[(i+10 )*incx] = bp[(i+10 )*incy];
3794 		ap[(i+11 )*incx] = bp[(i+11 )*incy];
3795 		ap[(i+12 )*incx] = bp[(i+12 )*incy];
3796 		ap[(i+13 )*incx] = bp[(i+13 )*incy];
3797 		ap[(i+14 )*incx] = bp[(i+14 )*incy];
3798 		ap[(i+15 )*incx] = bp[(i+15 )*incy];
3799 	}
3800 for(     ;i<n;++i){ 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3801 	 }
3802 }
3803 ;
3804 		return RSB_ERR_NO_ERROR;
3805 	}
3806 	else
3807 #endif
3808 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3809 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
3810 	{
3811 	rsb_nnz_idx_t i;
3812 	float *ap = a; const float *bp = b;
3813 	ap+=toi;
3814 	bp+=foi;
3815 	{
3816 for(i=0;i+15<n;i+=16){
3817 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3818 		ap[(i+1 )*incx] = bp[(i+1 )*incy];
3819 		ap[(i+2 )*incx] = bp[(i+2 )*incy];
3820 		ap[(i+3 )*incx] = bp[(i+3 )*incy];
3821 		ap[(i+4 )*incx] = bp[(i+4 )*incy];
3822 		ap[(i+5 )*incx] = bp[(i+5 )*incy];
3823 		ap[(i+6 )*incx] = bp[(i+6 )*incy];
3824 		ap[(i+7 )*incx] = bp[(i+7 )*incy];
3825 		ap[(i+8 )*incx] = bp[(i+8 )*incy];
3826 		ap[(i+9 )*incx] = bp[(i+9 )*incy];
3827 		ap[(i+10 )*incx] = bp[(i+10 )*incy];
3828 		ap[(i+11 )*incx] = bp[(i+11 )*incy];
3829 		ap[(i+12 )*incx] = bp[(i+12 )*incy];
3830 		ap[(i+13 )*incx] = bp[(i+13 )*incy];
3831 		ap[(i+14 )*incx] = bp[(i+14 )*incy];
3832 		ap[(i+15 )*incx] = bp[(i+15 )*incy];
3833 	}
3834 for(     ;i<n;++i){ 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3835 	 }
3836 }
3837 ;
3838 		return RSB_ERR_NO_ERROR;
3839 	}
3840 	else
3841 #endif
3842 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3843 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3844 	{
3845 	rsb_nnz_idx_t i;
3846 	float complex *ap = a; const float complex *bp = b;
3847 	ap+=toi;
3848 	bp+=foi;
3849 	{
3850 for(i=0;i+15<n;i+=16){
3851 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3852 		ap[(i+1 )*incx] = bp[(i+1 )*incy];
3853 		ap[(i+2 )*incx] = bp[(i+2 )*incy];
3854 		ap[(i+3 )*incx] = bp[(i+3 )*incy];
3855 		ap[(i+4 )*incx] = bp[(i+4 )*incy];
3856 		ap[(i+5 )*incx] = bp[(i+5 )*incy];
3857 		ap[(i+6 )*incx] = bp[(i+6 )*incy];
3858 		ap[(i+7 )*incx] = bp[(i+7 )*incy];
3859 		ap[(i+8 )*incx] = bp[(i+8 )*incy];
3860 		ap[(i+9 )*incx] = bp[(i+9 )*incy];
3861 		ap[(i+10 )*incx] = bp[(i+10 )*incy];
3862 		ap[(i+11 )*incx] = bp[(i+11 )*incy];
3863 		ap[(i+12 )*incx] = bp[(i+12 )*incy];
3864 		ap[(i+13 )*incx] = bp[(i+13 )*incy];
3865 		ap[(i+14 )*incx] = bp[(i+14 )*incy];
3866 		ap[(i+15 )*incx] = bp[(i+15 )*incy];
3867 	}
3868 for(     ;i<n;++i){ 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3869 	 }
3870 }
3871 ;
3872 		return RSB_ERR_NO_ERROR;
3873 	}
3874 	else
3875 #endif
3876 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3877 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
3878 	{
3879 	rsb_nnz_idx_t i;
3880 	double complex *ap = a; const double complex *bp = b;
3881 	ap+=toi;
3882 	bp+=foi;
3883 	{
3884 for(i=0;i+15<n;i+=16){
3885 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3886 		ap[(i+1 )*incx] = bp[(i+1 )*incy];
3887 		ap[(i+2 )*incx] = bp[(i+2 )*incy];
3888 		ap[(i+3 )*incx] = bp[(i+3 )*incy];
3889 		ap[(i+4 )*incx] = bp[(i+4 )*incy];
3890 		ap[(i+5 )*incx] = bp[(i+5 )*incy];
3891 		ap[(i+6 )*incx] = bp[(i+6 )*incy];
3892 		ap[(i+7 )*incx] = bp[(i+7 )*incy];
3893 		ap[(i+8 )*incx] = bp[(i+8 )*incy];
3894 		ap[(i+9 )*incx] = bp[(i+9 )*incy];
3895 		ap[(i+10 )*incx] = bp[(i+10 )*incy];
3896 		ap[(i+11 )*incx] = bp[(i+11 )*incy];
3897 		ap[(i+12 )*incx] = bp[(i+12 )*incy];
3898 		ap[(i+13 )*incx] = bp[(i+13 )*incy];
3899 		ap[(i+14 )*incx] = bp[(i+14 )*incy];
3900 		ap[(i+15 )*incx] = bp[(i+15 )*incy];
3901 	}
3902 for(     ;i<n;++i){ 	ap[(i+0 )*incx] = bp[(i+0 )*incy];
3903 	 }
3904 }
3905 ;
3906 		return RSB_ERR_NO_ERROR;
3907 	}
3908 	else
3909 #endif
3910 	return RSB_ERR_NO_ERROR;
3911 }
3912 
rsb__sqrt_of_sum_of_fabs_diffs(const void * a,const void * b,void * err,rsb_type_t type,size_t n)3913 rsb_err_t rsb__sqrt_of_sum_of_fabs_diffs(const void * a, const void * b, void *err, rsb_type_t type, size_t n)
3914 {
3915 	/*!
3916 	 * Will compute the square root of the sum of the squares of the vectors elements differences.
3917 	 * \param array	an array pointer
3918 	 * \param type	a valid type code
3919 	 * \param n	the input array length
3920 	 *
3921 	 * FIXME
3922 	 *
3923 	 * \return \rsberrcodemsg
3924 	 * */
3925 	size_t i;
3926 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3927 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
3928 	{
3929 	const double*ta = a; const double*tb = b;
3930 	*((double*)err) = ((double)(0));
3931 	{
3932 for(i=0;i+15<n;i+=16){
3933 	*((double*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3934 		*((double*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
3935 		*((double*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
3936 		*((double*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
3937 		*((double*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
3938 		*((double*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
3939 		*((double*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
3940 		*((double*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
3941 		*((double*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
3942 		*((double*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
3943 		*((double*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
3944 		*((double*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
3945 		*((double*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
3946 		*((double*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
3947 		*((double*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
3948 		*((double*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
3949 	}
3950 for(     ;i<n;++i){ 	*((double*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3951 	 }
3952 }
3953 ;
3954 	*((double*)err) = sqrt((*((double*)err)));
3955 	}
3956 	else
3957 #endif
3958 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3959 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
3960 	{
3961 	const float*ta = a; const float*tb = b;
3962 	*((float*)err) = ((float)(0));
3963 	{
3964 for(i=0;i+15<n;i+=16){
3965 	*((float*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3966 		*((float*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
3967 		*((float*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
3968 		*((float*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
3969 		*((float*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
3970 		*((float*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
3971 		*((float*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
3972 		*((float*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
3973 		*((float*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
3974 		*((float*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
3975 		*((float*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
3976 		*((float*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
3977 		*((float*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
3978 		*((float*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
3979 		*((float*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
3980 		*((float*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
3981 	}
3982 for(     ;i<n;++i){ 	*((float*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3983 	 }
3984 }
3985 ;
3986 	*((float*)err) = sqrtf((*((float*)err)));
3987 	}
3988 	else
3989 #endif
3990 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3991 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
3992 	{
3993 	const float complex*ta = a; const float complex*tb = b;
3994 	*((float complex*)err) = ((float complex)(0));
3995 	{
3996 for(i=0;i+15<n;i+=16){
3997 	*((float complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3998 		*((float complex*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
3999 		*((float complex*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
4000 		*((float complex*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
4001 		*((float complex*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
4002 		*((float complex*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
4003 		*((float complex*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
4004 		*((float complex*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
4005 		*((float complex*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
4006 		*((float complex*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
4007 		*((float complex*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
4008 		*((float complex*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
4009 		*((float complex*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
4010 		*((float complex*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
4011 		*((float complex*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
4012 		*((float complex*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
4013 	}
4014 for(     ;i<n;++i){ 	*((float complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
4015 	 }
4016 }
4017 ;
4018 	*((float complex*)err) = csqrtf((*((float complex*)err)));
4019 	}
4020 	else
4021 #endif
4022 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4023 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4024 	{
4025 	const double complex*ta = a; const double complex*tb = b;
4026 	*((double complex*)err) = ((double complex)(0));
4027 	{
4028 for(i=0;i+15<n;i+=16){
4029 	*((double complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
4030 		*((double complex*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
4031 		*((double complex*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
4032 		*((double complex*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
4033 		*((double complex*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
4034 		*((double complex*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
4035 		*((double complex*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
4036 		*((double complex*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
4037 		*((double complex*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
4038 		*((double complex*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
4039 		*((double complex*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
4040 		*((double complex*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
4041 		*((double complex*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
4042 		*((double complex*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
4043 		*((double complex*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
4044 		*((double complex*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
4045 	}
4046 for(     ;i<n;++i){ 	*((double complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
4047 	 }
4048 }
4049 ;
4050 	*((double complex*)err) = csqrt((*((double complex*)err)));
4051 	}
4052 	else
4053 #endif
4054 		return RSB_ERR_UNSUPPORTED_TYPE	;
4055 	return RSB_ERR_NO_ERROR;
4056 }
4057 
rsb__fill_with_increasing_values(void * array,rsb_type_t type,size_t n)4058 rsb_err_t rsb__fill_with_increasing_values(void * array, rsb_type_t type, size_t n)
4059 {
4060 	/*!
4061 	 * \ingroup gr_vec
4062 	 * FIXME : document me
4063 	 * starts with one.
4064 	 * \return \rsberrcodemsg
4065 	 * */
4066 	size_t i;
4067 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4068 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4069 	{
4070 	double*ta = array;
4071 {
4072 for(i=0;i+15<n;i+=16){
4073 ta[i+0 ] = (const double)(i+0 +1);ta[i+1 ] = (const double)(i+1 +1);ta[i+2 ] = (const double)(i+2 +1);ta[i+3 ] = (const double)(i+3 +1);ta[i+4 ] = (const double)(i+4 +1);ta[i+5 ] = (const double)(i+5 +1);ta[i+6 ] = (const double)(i+6 +1);ta[i+7 ] = (const double)(i+7 +1);ta[i+8 ] = (const double)(i+8 +1);ta[i+9 ] = (const double)(i+9 +1);ta[i+10 ] = (const double)(i+10 +1);ta[i+11 ] = (const double)(i+11 +1);ta[i+12 ] = (const double)(i+12 +1);ta[i+13 ] = (const double)(i+13 +1);ta[i+14 ] = (const double)(i+14 +1);ta[i+15 ] = (const double)(i+15 +1);}
4074 for(     ;i<n;++i){ ta[i+0 ] = (const double)(i+0 +1); }
4075 }
4076 
4077 	}
4078 	else
4079 #endif
4080 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4081 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4082 	{
4083 	float*ta = array;
4084 {
4085 for(i=0;i+15<n;i+=16){
4086 ta[i+0 ] = (const float)(i+0 +1);ta[i+1 ] = (const float)(i+1 +1);ta[i+2 ] = (const float)(i+2 +1);ta[i+3 ] = (const float)(i+3 +1);ta[i+4 ] = (const float)(i+4 +1);ta[i+5 ] = (const float)(i+5 +1);ta[i+6 ] = (const float)(i+6 +1);ta[i+7 ] = (const float)(i+7 +1);ta[i+8 ] = (const float)(i+8 +1);ta[i+9 ] = (const float)(i+9 +1);ta[i+10 ] = (const float)(i+10 +1);ta[i+11 ] = (const float)(i+11 +1);ta[i+12 ] = (const float)(i+12 +1);ta[i+13 ] = (const float)(i+13 +1);ta[i+14 ] = (const float)(i+14 +1);ta[i+15 ] = (const float)(i+15 +1);}
4087 for(     ;i<n;++i){ ta[i+0 ] = (const float)(i+0 +1); }
4088 }
4089 
4090 	}
4091 	else
4092 #endif
4093 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4094 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4095 	{
4096 	float complex*ta = array;
4097 {
4098 for(i=0;i+15<n;i+=16){
4099 ta[i+0 ] = (const float complex)(i+0 +1);ta[i+1 ] = (const float complex)(i+1 +1);ta[i+2 ] = (const float complex)(i+2 +1);ta[i+3 ] = (const float complex)(i+3 +1);ta[i+4 ] = (const float complex)(i+4 +1);ta[i+5 ] = (const float complex)(i+5 +1);ta[i+6 ] = (const float complex)(i+6 +1);ta[i+7 ] = (const float complex)(i+7 +1);ta[i+8 ] = (const float complex)(i+8 +1);ta[i+9 ] = (const float complex)(i+9 +1);ta[i+10 ] = (const float complex)(i+10 +1);ta[i+11 ] = (const float complex)(i+11 +1);ta[i+12 ] = (const float complex)(i+12 +1);ta[i+13 ] = (const float complex)(i+13 +1);ta[i+14 ] = (const float complex)(i+14 +1);ta[i+15 ] = (const float complex)(i+15 +1);}
4100 for(     ;i<n;++i){ ta[i+0 ] = (const float complex)(i+0 +1); }
4101 }
4102 
4103 	}
4104 	else
4105 #endif
4106 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4107 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4108 	{
4109 	double complex*ta = array;
4110 {
4111 for(i=0;i+15<n;i+=16){
4112 ta[i+0 ] = (const double complex)(i+0 +1);ta[i+1 ] = (const double complex)(i+1 +1);ta[i+2 ] = (const double complex)(i+2 +1);ta[i+3 ] = (const double complex)(i+3 +1);ta[i+4 ] = (const double complex)(i+4 +1);ta[i+5 ] = (const double complex)(i+5 +1);ta[i+6 ] = (const double complex)(i+6 +1);ta[i+7 ] = (const double complex)(i+7 +1);ta[i+8 ] = (const double complex)(i+8 +1);ta[i+9 ] = (const double complex)(i+9 +1);ta[i+10 ] = (const double complex)(i+10 +1);ta[i+11 ] = (const double complex)(i+11 +1);ta[i+12 ] = (const double complex)(i+12 +1);ta[i+13 ] = (const double complex)(i+13 +1);ta[i+14 ] = (const double complex)(i+14 +1);ta[i+15 ] = (const double complex)(i+15 +1);}
4113 for(     ;i<n;++i){ ta[i+0 ] = (const double complex)(i+0 +1); }
4114 }
4115 
4116 	}
4117 	else
4118 #endif
4119 		return RSB_ERR_UNSUPPORTED_TYPE	;
4120 	return RSB_ERR_NO_ERROR;
4121 }
4122 
rsb__util_do_conjugate(void * array,rsb_type_t type,size_t n)4123 rsb_err_t rsb__util_do_conjugate(void * array, rsb_type_t type, size_t n)
4124 {
4125 	/*!
4126 	 * \ingroup gr_vec
4127 	 * \return \rsberrcodemsg
4128 	 * */
4129 	size_t i;
4130 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4131 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4132 		return RSB_ERR_NO_ERROR;
4133 	else
4134 #endif
4135 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4136 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4137 		return RSB_ERR_NO_ERROR;
4138 	else
4139 #endif
4140 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4141 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4142 	{
4143 		float complex*ta = array;
4144 {
4145 for(i=0;i+15<n;i+=16){
4146 ta[i+0 ] = conjf(ta[i+0 ]);ta[i+1 ] = conjf(ta[i+1 ]);ta[i+2 ] = conjf(ta[i+2 ]);ta[i+3 ] = conjf(ta[i+3 ]);ta[i+4 ] = conjf(ta[i+4 ]);ta[i+5 ] = conjf(ta[i+5 ]);ta[i+6 ] = conjf(ta[i+6 ]);ta[i+7 ] = conjf(ta[i+7 ]);ta[i+8 ] = conjf(ta[i+8 ]);ta[i+9 ] = conjf(ta[i+9 ]);ta[i+10 ] = conjf(ta[i+10 ]);ta[i+11 ] = conjf(ta[i+11 ]);ta[i+12 ] = conjf(ta[i+12 ]);ta[i+13 ] = conjf(ta[i+13 ]);ta[i+14 ] = conjf(ta[i+14 ]);ta[i+15 ] = conjf(ta[i+15 ]);}
4147 for(     ;i<n;++i){ ta[i+0 ] = conjf(ta[i+0 ]); }
4148 }
4149 
4150 	}
4151 	else
4152 #endif
4153 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4154 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4155 	{
4156 		double complex*ta = array;
4157 {
4158 for(i=0;i+15<n;i+=16){
4159 ta[i+0 ] = conj(ta[i+0 ]);ta[i+1 ] = conj(ta[i+1 ]);ta[i+2 ] = conj(ta[i+2 ]);ta[i+3 ] = conj(ta[i+3 ]);ta[i+4 ] = conj(ta[i+4 ]);ta[i+5 ] = conj(ta[i+5 ]);ta[i+6 ] = conj(ta[i+6 ]);ta[i+7 ] = conj(ta[i+7 ]);ta[i+8 ] = conj(ta[i+8 ]);ta[i+9 ] = conj(ta[i+9 ]);ta[i+10 ] = conj(ta[i+10 ]);ta[i+11 ] = conj(ta[i+11 ]);ta[i+12 ] = conj(ta[i+12 ]);ta[i+13 ] = conj(ta[i+13 ]);ta[i+14 ] = conj(ta[i+14 ]);ta[i+15 ] = conj(ta[i+15 ]);}
4160 for(     ;i<n;++i){ ta[i+0 ] = conj(ta[i+0 ]); }
4161 }
4162 
4163 	}
4164 	else
4165 #endif
4166 		return RSB_ERR_UNSUPPORTED_TYPE	;
4167 	return RSB_ERR_NO_ERROR;
4168 }
4169 
rsb__util_do_negate(void * array,rsb_type_t type,size_t n)4170 rsb_err_t rsb__util_do_negate(void * array, rsb_type_t type, size_t n)
4171 {
4172 	/*!
4173 	 * \ingroup gr_vec
4174 	 * Will negate the input n elements long array of type type.
4175 	 * \param array	an array pointer
4176 	 * \param type	a valid type code
4177 	 * \param n	the input array length
4178 	 *
4179 	 * \return \rsberrcodemsg
4180 	 * */
4181 	size_t i;
4182 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4183 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4184 {
4185 	double*ta = array;
4186 {
4187 for(i=0;i+15<n;i+=16){
4188 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4189 for(     ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4190 }
4191 }
4192 	else
4193 #endif
4194 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4195 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4196 {
4197 	float*ta = array;
4198 {
4199 for(i=0;i+15<n;i+=16){
4200 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4201 for(     ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4202 }
4203 }
4204 	else
4205 #endif
4206 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4207 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4208 {
4209 	float complex*ta = array;
4210 {
4211 for(i=0;i+15<n;i+=16){
4212 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4213 for(     ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4214 }
4215 }
4216 	else
4217 #endif
4218 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4219 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4220 {
4221 	double complex*ta = array;
4222 {
4223 for(i=0;i+15<n;i+=16){
4224 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4225 for(     ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4226 }
4227 }
4228 	else
4229 #endif
4230 		return RSB_ERR_UNSUPPORTED_TYPE	;
4231 	return RSB_ERR_NO_ERROR;
4232 }
4233 
rsb__util_find_min(void * minp,const void * array,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)4234 rsb_err_t rsb__util_find_min(void * minp, const void * array, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
4235 {
4236 	/*!
4237 	 * \ingroup gr_vec
4238 	 *
4239 	 * \return \rsberrcodemsg
4240 	 * */
4241 	size_t i;
4242 	if(n<1)return RSB_ERR_BADARGS;
4243 	if(inc<1)return RSB_ERR_BADARGS;
4244 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4245 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4246 	{const double * ap = array;double *mp = minp;
4247 	*mp = *ap;for(i = 1;i<n;++i){if(fabs(ap[i*inc])<fabs(*mp) )*mp = ap[i*inc];
4248 	}}
4249 	else
4250 #endif
4251 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4252 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4253 	{const float * ap = array;float *mp = minp;
4254 	*mp = *ap;for(i = 1;i<n;++i){if(fabsf(ap[i*inc])<fabsf(*mp) )*mp = ap[i*inc];
4255 	}}
4256 	else
4257 #endif
4258 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4259 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4260 	{const float complex * ap = array;float complex *mp = minp;
4261 	*mp = *ap;for(i = 1;i<n;++i){if(cabsf(ap[i*inc])<cabsf(*mp) )*mp = ap[i*inc];
4262 	}}
4263 	else
4264 #endif
4265 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4266 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4267 	{const double complex * ap = array;double complex *mp = minp;
4268 	*mp = *ap;for(i = 1;i<n;++i){if(cabs(ap[i*inc])<cabs(*mp) )*mp = ap[i*inc];
4269 	}}
4270 	else
4271 #endif
4272 	return RSB_ERR_UNSUPPORTED_TYPE	;
4273 	return RSB_ERR_NO_ERROR;
4274 }
4275 
rsb__util_find_max(void * maxp,const void * array,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)4276 rsb_err_t rsb__util_find_max(void * maxp, const void * array, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
4277 {
4278 	/*!
4279 	 * \ingroup gr_vec
4280 	 *
4281 	 * \return \rsberrcodemsg
4282 	 * */
4283 	size_t i;
4284 	if(n<1)return RSB_ERR_BADARGS;
4285 	if(inc<1)return RSB_ERR_BADARGS;
4286 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4287 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4288 	{const double * ap = array;double *mp = maxp;
4289 	*mp = *ap;for(i=1;i<n;++i){if(fabs(ap[i*inc])>fabs(*mp))*mp = ap[i*inc];
4290 	}}
4291 	else
4292 #endif
4293 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4294 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4295 	{const float * ap = array;float *mp = maxp;
4296 	*mp = *ap;for(i=1;i<n;++i){if(fabsf(ap[i*inc])>fabsf(*mp))*mp = ap[i*inc];
4297 	}}
4298 	else
4299 #endif
4300 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4301 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4302 	{const float complex * ap = array;float complex *mp = maxp;
4303 	*mp = *ap;for(i=1;i<n;++i){if(cabsf(ap[i*inc])>cabsf(*mp))*mp = ap[i*inc];
4304 	}}
4305 	else
4306 #endif
4307 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4308 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4309 	{const double complex * ap = array;double complex *mp = maxp;
4310 	*mp = *ap;for(i=1;i<n;++i){if(cabs(ap[i*inc])>cabs(*mp))*mp = ap[i*inc];
4311 	}}
4312 	else
4313 #endif
4314 	return RSB_ERR_UNSUPPORTED_TYPE	;
4315 	return RSB_ERR_NO_ERROR;
4316 }
4317 
rsb__util_drop_to_zero_if_above_threshold(void * array,rsb_type_t type,size_t n,const void * threshold)4318 rsb_err_t rsb__util_drop_to_zero_if_above_threshold(void * array, rsb_type_t type, size_t n, const void * threshold)
4319 {
4320 	/*!
4321 	 * \ingroup gr_vec
4322 	 *
4323 	 * \return \rsberrcodemsg
4324 	 * */
4325 	size_t i;
4326 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4327 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4328 	{const double th = (*(const double*)(threshold)); double*ta = array;
4329 	for(i = 0;i<n;++i)
4330 	{if(fabs(th)<fabs(ta[i]))ta[i] = ((double)(0));}}
4331 	else
4332 #endif
4333 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4334 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4335 	{const float th = (*(const float*)(threshold)); float*ta = array;
4336 	for(i = 0;i<n;++i)
4337 	{if(fabsf(th)<fabsf(ta[i]))ta[i] = ((float)(0));}}
4338 	else
4339 #endif
4340 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4341 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4342 	{const float complex th = (*(const float complex*)(threshold)); float complex*ta = array;
4343 	for(i = 0;i<n;++i)
4344 	{if(cabsf(th)<cabsf(ta[i]))ta[i] = ((float complex)(0));}}
4345 	else
4346 #endif
4347 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4348 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4349 	{const double complex th = (*(const double complex*)(threshold)); double complex*ta = array;
4350 	for(i = 0;i<n;++i)
4351 	{if(cabs(th)<cabs(ta[i]))ta[i] = ((double complex)(0));}}
4352 	else
4353 #endif
4354 	return RSB_ERR_UNSUPPORTED_TYPE	;
4355 	return RSB_ERR_NO_ERROR;
4356 }
4357 
rsb__util_count_positive(void * array,rsb_type_t type,size_t n)4358 rsb_nnz_idx_t rsb__util_count_positive(void * array, rsb_type_t type, size_t n)
4359 {
4360 	/*!
4361 	 * \ingroup gr_vec
4362 	 *
4363 	 * \return \rsberrcodemsg
4364 	 * */
4365 	size_t i, c = 0;
4366 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4367 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4368 	{	double*ta = array;
4369 		 for(i=0;i<n;++i)
4370 			c+=((ta[i])>(double)0);
4371 	}else
4372 #endif
4373 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4374 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4375 	{	float*ta = array;
4376 		 for(i=0;i<n;++i)
4377 			c+=((ta[i])>(float)0);
4378 	}else
4379 #endif
4380 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4381 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4382 	{	float complex*ta = array;
4383 		 for(i=0;i<n;++i)
4384 			c+=(crealf(ta[i])>(float)0);
4385 	}else
4386 #endif
4387 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4388 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4389 	{	double complex*ta = array;
4390 		 for(i=0;i<n;++i)
4391 			c+=(creal(ta[i])>(double)0);
4392 	}else
4393 #endif
4394 	return RSB_ERR_UNSUPPORTED_TYPE	;
4395 	return c;
4396 }
4397 
rsb__util_count_negative(void * array,rsb_type_t type,size_t n)4398 rsb_nnz_idx_t rsb__util_count_negative(void * array, rsb_type_t type, size_t n)
4399 {
4400 	/*!
4401 	 * \ingroup gr_vec
4402 	 *
4403 	 * \return \rsberrcodemsg
4404 	 * */
4405 	size_t i, c = 0;
4406 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4407 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4408 	{	double*ta = array;
4409 		 for(i=0;i<n;++i)
4410 			c+=((ta[i])<(double)0);
4411 	}else
4412 #endif
4413 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4414 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4415 	{	float*ta = array;
4416 		 for(i=0;i<n;++i)
4417 			c+=((ta[i])<(float)0);
4418 	}else
4419 #endif
4420 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4421 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4422 	{	float complex*ta = array;
4423 		 for(i=0;i<n;++i)
4424 			c+=(crealf(ta[i])<(float)0);
4425 	}else
4426 #endif
4427 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4428 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4429 	{	double complex*ta = array;
4430 		 for(i=0;i<n;++i)
4431 			c+=(creal(ta[i])<(double)0);
4432 	}else
4433 #endif
4434 	return RSB_ERR_UNSUPPORTED_TYPE	;
4435 	return c;
4436 }
4437 
rsb__util_drop_to_zero_if_under_threshold(void * array,rsb_type_t type,size_t n,const void * threshold)4438 rsb_err_t rsb__util_drop_to_zero_if_under_threshold(void * array, rsb_type_t type, size_t n, const void * threshold)
4439 {
4440 	/*!
4441 	 * \ingroup gr_vec
4442 	 *
4443 	 * \return \rsberrcodemsg
4444 	 * */
4445 	size_t i;
4446 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4447 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  ) {
4448 	const double th = (*(double*)(threshold)); double*ta = ((double*)(array));
4449 	for(i=0;i<n;++i){if(fabs(th)>fabs(ta[i]))ta[i] = ((double)(0));}}
4450 	else
4451 #endif
4452 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4453 	if( type == RSB_NUMERICAL_TYPE_FLOAT  ) {
4454 	const float th = (*(float*)(threshold)); float*ta = ((float*)(array));
4455 	for(i=0;i<n;++i){if(fabsf(th)>fabsf(ta[i]))ta[i] = ((float)(0));}}
4456 	else
4457 #endif
4458 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4459 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ) {
4460 	const float complex th = (*(float complex*)(threshold)); float complex*ta = ((float complex*)(array));
4461 	for(i=0;i<n;++i){if(cabsf(th)>cabsf(ta[i]))ta[i] = ((float complex)(0));}}
4462 	else
4463 #endif
4464 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4465 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ) {
4466 	const double complex th = (*(double complex*)(threshold)); double complex*ta = ((double complex*)(array));
4467 	for(i=0;i<n;++i){if(cabs(th)>cabs(ta[i]))ta[i] = ((double complex)(0));}}
4468 	else
4469 #endif
4470 	return RSB_ERR_UNSUPPORTED_TYPE	;
4471 	return RSB_ERR_NO_ERROR;
4472 }
4473 
rsb__fill_with_ones(void * array,rsb_type_t type,size_t n,size_t incx)4474 rsb_err_t rsb__fill_with_ones(void * array, rsb_type_t type, size_t n, size_t incx){
4475 	/*!
4476 	 * \ingroup gr_vec
4477 	 * Will set to one the input n elements long array of type type.
4478 	 * \param array	an array pointer
4479 	 * \param type	a valid type code
4480 	 * \param n	the input array length
4481 	 *
4482 	 * \return \rsberrcodemsg
4483 	 * TODO:RENAME: rsb__fill_with_ones -> rsb__val_fill_with_ones.
4484 	 * */
4485 	size_t i;
4486 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4487 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  ){
4488 	double*ta = ((double*)(array));
4489  for(i=0;i<n;++i) {ta[i*incx] = ((double)(1.0));}}
4490 	else
4491 #endif
4492 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4493 	if( type == RSB_NUMERICAL_TYPE_FLOAT  ){
4494 	float*ta = ((float*)(array));
4495  for(i=0;i<n;++i) {ta[i*incx] = ((float)(1.0));}}
4496 	else
4497 #endif
4498 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4499 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  ){
4500 	float complex*ta = ((float complex*)(array));
4501  for(i=0;i<n;++i) {ta[i*incx] = ((float complex)(1.0));}}
4502 	else
4503 #endif
4504 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4505 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  ){
4506 	double complex*ta = ((double complex*)(array));
4507  for(i=0;i<n;++i) {ta[i*incx] = ((double complex)(1.0));}}
4508 	else
4509 #endif
4510 	return RSB_ERR_UNSUPPORTED_TYPE	;
4511 	return RSB_ERR_NO_ERROR;
4512 }
4513 
rsb__debug_print_vectors_diff(const void * v1,const void * v2,size_t n,rsb_type_t type,size_t incx,size_t incy,int onlyfirst)4514 rsb_err_t rsb__debug_print_vectors_diff(const void * v1, const void * v2, size_t n, rsb_type_t type, size_t incx, size_t incy, int onlyfirst){
4515 	/*!
4516 	 * A debug function for printing the difference of two vectors of a specified type, in parallel.
4517 	 * FIXME : It should take into account thresholds specific to each numerical type.
4518 	 **/
4519 #if RSB_ALLOW_STDOUT
4520 	size_t i, differing = 0;
4521 	if(!v1 || !v2)return RSB_ERR_BADARGS;
4522 
4523 	RSB_STDERR("\t vectors diff :\n");
4524 
4525 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4526 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4527 	{
4528 		const double *v1p = v1,*v2p = v2; double th = 0.0001;
4529 		for(i=0;i<n ;++i)
4530 										if(fabs((double)(v1p[i*incx]-v2p[i*incy]))>th)/*FIXME : incomplete check*/
4531 {		differing++;
4532 		if((onlyfirst==0)||(onlyfirst>differing))
4533 		RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING"\n",(rsb_printf_int_t)i,						v1p[i*incx],v2p[i*incy]		);
4534 }
4535 	}
4536 	else
4537 #endif
4538 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4539 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4540 	{
4541 		const float *v1p = v1,*v2p = v2; float th = 0.0001;
4542 		for(i=0;i<n ;++i)
4543 										if(fabs((double)(v1p[i*incx]-v2p[i*incy]))>th)/*FIXME : incomplete check*/
4544 {		differing++;
4545 		if((onlyfirst==0)||(onlyfirst>differing))
4546 		RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING"\n",(rsb_printf_int_t)i,						v1p[i*incx],v2p[i*incy]		);
4547 }
4548 	}
4549 	else
4550 #endif
4551 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4552 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4553 	{
4554 		const float complex *v1p = v1,*v2p = v2; float th = 0.0001;
4555 		for(i=0;i<n ;++i)
4556 				if(crealf(v1p[i*incx])-crealf(v2p[i*incy])>th)/*FIXME : incomplete check*/{		differing++;
4557 		if((onlyfirst==0)||(onlyfirst>differing))
4558 		RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING"\n",(rsb_printf_int_t)i,				crealf(v1p[i*incx]),cimagf(v1p[i*incx]),crealf(v2p[i*incy]),cimagf(v2p[i*incy])		);
4559 }
4560 	}
4561 	else
4562 #endif
4563 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4564 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4565 	{
4566 		const double complex *v1p = v1,*v2p = v2; double th = 0.0001;
4567 		for(i=0;i<n ;++i)
4568 		if(creal(v1p[i*incx])-creal(v2p[i*incy])>th)/*FIXME : incomplete check*/{		differing++;
4569 		if((onlyfirst==0)||(onlyfirst>differing))
4570 		RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING"\n",(rsb_printf_int_t)i,		creal(v1p[i*incx]),cimag(v1p[i*incx]),creal(v2p[i*incy]),cimag(v2p[i*incy])		);
4571 }
4572 	}
4573 	else
4574 #endif
4575 	return RSB_ERR_UNSUPPORTED_TYPE	;
4576 	if(differing>onlyfirst)RSB_STDOUT("...(for a total of %zd differing entries)...\n",(rsb_printf_int_t)(differing-onlyfirst));
4577 	return RSB_ERR_NO_ERROR;
4578 #else
4579 	return RSB_ERR_UNSUPPORTED_FEATURE;
4580 #endif
4581 }
4582 
rsb__debug_print_value(const void * v,rsb_type_t type)4583 rsb_err_t rsb__debug_print_value(const void * v, rsb_type_t type){
4584 	/*!
4585 	 **/
4586 #if RSB_ALLOW_STDOUT
4587 	if(!v)return RSB_ERR_BADARGS;
4588 
4589 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4590 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4591 	{
4592 		const double *v1p = v;
4593 		RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING,						v1p[0]		);
4594 	}
4595 	else
4596 #endif
4597 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4598 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4599 	{
4600 		const float *v1p = v;
4601 		RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING,						v1p[0]		);
4602 	}
4603 	else
4604 #endif
4605 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4606 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4607 	{
4608 		const float complex *v1p = v;
4609 		RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING,				crealf(v1p[0]),cimagf(v1p[0])		);
4610 	}
4611 	else
4612 #endif
4613 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4614 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4615 	{
4616 		const double complex *v1p = v;
4617 		RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING,		creal(v1p[0]),cimag(v1p[0])		);
4618 	}
4619 	else
4620 #endif
4621 	return RSB_ERR_UNSUPPORTED_TYPE	;
4622 	return RSB_ERR_NO_ERROR;
4623 #else
4624 	return RSB_ERR_UNSUPPORTED_FEATURE;
4625 #endif
4626 }
4627 
rsb__debug_print_vector_extra(const void * v1,size_t n,rsb_type_t type,size_t inc,int style,FILE * stream)4628 rsb_err_t rsb__debug_print_vector_extra(const void * v1, size_t n, rsb_type_t type, size_t inc, int style, FILE*stream){
4629 	/*!
4630 	 * A debug function for printing two vectors of a specified type, in parallel.
4631 	 **/
4632 #if RSB_ALLOW_STDOUT
4633 	rsb_nnz_idx_t i;
4634 	int want_header = ( style == 0x1 );
4635 	const char * ts = RSB_IS_MATRIX_TYPE_COMPLEX(type)?"complex":"real";
4636 	const char * ss = RSB_SYMMETRY_STRING(RSB_FLAG_NOFLAGS);
4637 
4638 	if( n < 0 )
4639 		goto errb;
4640 
4641 	if(!v1 || !stream)
4642 		goto errb;
4643 
4644 	/*if(!want_header)
4645 		RSB_STDERR("\t vectors  :\n");*/
4646 
4647 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4648 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4649 	{
4650 		const double *v1p = v1;
4651 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4652 		for(i=0;i<n;++i)
4653 		RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING "\n",						v1p[i*inc]		);
4654 	}
4655 	else
4656 #endif
4657 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4658 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4659 	{
4660 		const float *v1p = v1;
4661 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4662 		for(i=0;i<n;++i)
4663 		RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING "\n",						v1p[i*inc]		);
4664 	}
4665 	else
4666 #endif
4667 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4668 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4669 	{
4670 		const float complex *v1p = v1;
4671 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4672 		for(i=0;i<n;++i)
4673 		RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING "\n",				crealf(v1p[i*inc]),cimagf(v1p[i*inc])		);
4674 	}
4675 	else
4676 #endif
4677 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4678 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4679 	{
4680 		const double complex *v1p = v1;
4681 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4682 		for(i=0;i<n;++i)
4683 		RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING "\n",		creal(v1p[i*inc]),cimag(v1p[i*inc])		);
4684 	}
4685 	else
4686 #endif
4687 	return RSB_ERR_UNSUPPORTED_TYPE	;
4688 	return RSB_ERR_NO_ERROR;
4689 #else
4690 	return RSB_ERR_UNSUPPORTED_FEATURE;
4691 #endif
4692 errb:
4693 	return RSB_ERR_BADARGS;
4694 }
4695 
rsb__debug_print_vector(const void * v1,size_t n,rsb_type_t type,size_t inc)4696 rsb_err_t rsb__debug_print_vector(const void * v1, size_t n, rsb_type_t type, size_t inc){
4697 	return rsb__debug_print_vector_extra(v1, n, type, inc, 0x0, stdout);
4698 }
4699 
rsb__debug_print_vectors(const void * v1,const void * v2,size_t n,size_t incx,size_t incy,rsb_type_t type)4700 rsb_err_t rsb__debug_print_vectors(const void * v1, const void * v2, size_t n, size_t incx, size_t incy, rsb_type_t type){
4701 	/*!
4702 	 * A debug function for printing two vectors of a specified type, in parallel.
4703 	 **/
4704 #if RSB_ALLOW_STDOUT
4705 	size_t i;
4706 	if(!v1 || !v2)return RSB_ERR_BADARGS;
4707 
4708 	RSB_STDERR("\t vectors  :\n");
4709 
4710 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4711 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
4712 	{
4713 		const double *v1p = v1,*v2p = v2;
4714 		for(i=0;i<n;++i)
4715 		RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING"\n",v1p[(i)*incx],v2p[(i)*incy]);
4716 	}
4717 	else
4718 #endif
4719 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4720 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
4721 	{
4722 		const float *v1p = v1,*v2p = v2;
4723 		for(i=0;i<n;++i)
4724 		RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING"\n",v1p[(i)*incx],v2p[(i)*incy]);
4725 	}
4726 	else
4727 #endif
4728 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4729 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
4730 	{
4731 		const float complex *v1p = v1,*v2p = v2;
4732 		for(i=0;i<n;++i)
4733 		RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING"\n",crealf(v1p[(i)*incx]),cimagf(v1p[(i)*incx]),crealf(v2p[(i)*incy]),cimagf(v2p[(i)*incy]));
4734 	}
4735 	else
4736 #endif
4737 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4738 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
4739 	{
4740 		const double complex *v1p = v1,*v2p = v2;
4741 		for(i=0;i<n;++i)
4742 		RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING"\n",creal(v1p[(i)*incx]),cimag(v1p[(i)*incx]),creal(v2p[(i)*incy]),cimag(v2p[(i)*incy]));
4743 	}
4744 	else
4745 #endif
4746 	return RSB_ERR_UNSUPPORTED_TYPE	;
4747 	return RSB_ERR_NO_ERROR;
4748 #else
4749 	return RSB_ERR_UNSUPPORTED_FEATURE;
4750 #endif
4751 }
4752 
4753 
rsb__do_account_sorted_optimized_css(const rsb_coo_idx_t * MIndx,const rsb_coo_idx_t * mIndx,const rsb_coo_idx_t Mdim,const rsb_coo_idx_t mdim,const rsb_nnz_idx_t nnz,rsb_nnz_idx_t * elements_per_block_row,rsb_nnz_idx_t * blocks_per_block_row)4754 rsb_err_t rsb__do_account_sorted_optimized_css(
4755 	 const rsb_coo_idx_t * MIndx, const rsb_coo_idx_t * mIndx,
4756 	 const rsb_coo_idx_t Mdim, const rsb_coo_idx_t mdim,
4757 	 const rsb_nnz_idx_t nnz, rsb_nnz_idx_t * elements_per_block_row, rsb_nnz_idx_t * blocks_per_block_row
4758 )
4759 {
4760 	/**
4761 	 	\ingroup gr_internals
4762 
4763 		elements_per_block_row and blocks_per_block_row arrays should be blanked.
4764 		FIXME : missing error handling.
4765 	*/
4766 	rsb_err_t errval = RSB_ERR_NO_ERROR;
4767 	rsb_nnz_idx_t n = 0;
4768 
4769 	if(blocks_per_block_row)
4770 	for(n=0;n<nnz;++n)
4771 	{
4772 		RSB_DEBUG_ASSERT(MIndx[n]<Mdim);
4773 		RSB_DEBUG_ASSERT(MIndx[n]>=0);
4774 		elements_per_block_row[MIndx[n]]++;
4775 		blocks_per_block_row  [MIndx[n]]++;
4776 	}
4777 	else
4778 	for(n=0;n<nnz;++n)
4779 	{
4780 		RSB_DEBUG_ASSERT(MIndx[n]<Mdim);
4781 		RSB_DEBUG_ASSERT(MIndx[n]>=0);
4782 		elements_per_block_row[MIndx[n]]++;
4783 	}
4784 	RSB_DO_ERR_RETURN(errval)
4785 }
4786 
rsb__do_account_sorted_optimized(struct rsb_mtx_t * mtxAp,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_coo_idx_t Idim,const rsb_coo_idx_t Jdim,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_nnz_idx_t * elements_per_block_row,rsb_nnz_idx_t * blocks_per_block_row)4787 rsb_err_t rsb__do_account_sorted_optimized(
4788 	 struct rsb_mtx_t * mtxAp,
4789 	 const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA,
4790 	 const rsb_coo_idx_t Idim, const rsb_coo_idx_t Jdim,
4791 	 const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop,
4792 rsb_nnz_idx_t * elements_per_block_row,
4793 rsb_nnz_idx_t * blocks_per_block_row
4794 )
4795 {
4796 	/**
4797 	 *	\ingroup gr_internals
4798 	 * 	FIXME : document this
4799 	 */
4800 	rsb_coo_idx_t blockrows = 0;
4801 	rsb_coo_idx_t blockcolumns = 0;
4802 	rsb_coo_idx_t baserow = 0;
4803 	rsb_coo_idx_t basecolumn = 0;
4804 	const rsb_coo_idx_t *Mpntr = NULL;
4805 	const rsb_coo_idx_t *mpntr = NULL;
4806 	const rsb_coo_idx_t *MIndx = NULL;
4807 	const rsb_coo_idx_t *mIndx = NULL;
4808 	rsb_blk_idx_t mI = 0, MI = 0;
4809 	rsb_err_t errval = RSB_ERR_NO_ERROR;
4810 	rsb_nnz_idx_t k = 0;	/* will index a nnz sized array */
4811 	int K = 0;
4812 
4813 	if(0)
4814 	//if( flags & RSB_FLAG_SHOULD_DEBUG )
4815 		errval = rsb__do_account_sorted( mtxAp, IA, JA, nnz, pinfop, elements_per_block_row, blocks_per_block_row);
4816 
4817 	if(nnz==0)
4818 	{
4819 		/* FIXME: new case, incomplete (useful for diagonal implicit matrices) */
4820 		return RSB_ERR_NO_ERROR;
4821 	}
4822 
4823 #if RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
4824 	if(!pinfop)
4825 	{
4826 		/* a performance fix */
4827 		if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
4828 			return rsb__do_account_sorted_optimized_css(JA,IA,Jdim,Idim,nnz,elements_per_block_row,blocks_per_block_row);
4829 		else
4830 			return rsb__do_account_sorted_optimized_css(IA,JA,Idim,Jdim,nnz,elements_per_block_row,blocks_per_block_row);
4831 	}
4832 #endif
4833 
4834 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
4835 	{
4836 		mpntr = pinfop->rpntr;
4837 		Mpntr = pinfop->cpntr;
4838 		mIndx = IA;
4839 		MIndx = JA;
4840 	}
4841 	else
4842 	{
4843 		Mpntr = pinfop->rpntr;
4844 		mpntr = pinfop->cpntr;
4845 		MIndx = IA;
4846 		mIndx = JA;
4847 	}
4848 
4849 	/*	storage BCOR	*/
4850 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
4851 {
4852 	k = mI = MI = 0;K = 0;
4853 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
4854 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
4855 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
4856 	RSB_ASSERT( blockrows && blockcolumns);
4857 #else
4858 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
4859 	blockcolumns = mpntr[mI+1] - mpntr[mI];
4860 #endif
4861 
4862 
4863 	k = mI = MI = K=0;
4864 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping preceding block rows .. */
4865 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping preceding block columns .. */
4866 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
4867 	blockcolumns = mpntr[mI+1] - mpntr[mI];
4868 	baserow = Mpntr[MI];
4869 	basecolumn = mpntr[mI];
4870 	*elements_per_block_row = 0;
4871 	*blocks_per_block_row   = 0;
4872 	elements_per_block_row[MI*0] += blockrows * blockcolumns;
4873 	blocks_per_block_row[MI]   +=1;
4874 
4875 	while(RSB_LIKELY(k<nnz))
4876 	{
4877 #ifdef DEBUG
4878 		if( MIndx[k] < baserow  )
4879 		{
4880 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k,(rsb_printf_int_t) (MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
4881 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
4882 			errval = RSB_ERR_INTERNAL_ERROR;
4883 			goto err;
4884 		}
4885 #endif
4886 
4887 		if( mIndx[k] >= basecolumn+blockcolumns  )
4888 		{
4889 			/* new block column, for sure */
4890 
4891 			while( mIndx[k] >= mpntr[mI+1] )++mI;
4892 			blockcolumns = mpntr[mI+1] - mpntr[mI];
4893 			basecolumn = mpntr[mI];
4894 
4895 			if( MIndx[k] >= baserow+blockrows  )
4896 			{
4897 				/* new block row  */
4898 
4899 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
4900 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
4901 				baserow = Mpntr[MI];
4902 			}
4903 			else
4904 			{
4905 				/* same block row  */
4906 			}
4907 			*elements_per_block_row += blockrows * blockcolumns;
4908 			blocks_per_block_row[MI]   +=1;
4909 			++K;
4910 		}
4911 		else
4912 		if( MIndx[k] >= baserow+blockrows  )
4913 		{
4914 			/* new row block, for sure */
4915 
4916 			while( MIndx[k] >= Mpntr[MI+1] )++MI;
4917 			blockrows    = Mpntr[MI+1] - Mpntr[MI];
4918 			baserow = Mpntr[MI];
4919 
4920 			if( mIndx[k] < basecolumn  )
4921 			{
4922 				/* new row block, new block column  */
4923 
4924 				mI = 0;
4925 				while( mIndx[k] >= mpntr[mI+1] )++mI;
4926 				blockcolumns = mpntr[mI+1] - mpntr[mI];
4927 				basecolumn = mpntr[mI];
4928 			}
4929 			else
4930 			{
4931 				/* new row block, same column  */
4932 			}
4933 			/* get rid of this var : elements_per_block_row */
4934 			*elements_per_block_row += blockrows * blockcolumns;
4935 			blocks_per_block_row[MI]   +=1;
4936 			++K;
4937 		}
4938 		else
4939 		{
4940 			/* same block row for sure */
4941 		}
4942 		++k;
4943 	}
4944 	errval = RSB_ERR_NO_ERROR;goto ret;
4945 	}
4946 	/*	storage BCSR	*/
4947 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
4948 {
4949 	k = mI = MI = 0;K = 0;
4950 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
4951 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
4952 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
4953 	RSB_ASSERT( blockrows && blockcolumns);
4954 #else
4955 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
4956 	blockcolumns = mpntr[mI+1] - mpntr[mI];
4957 #endif
4958 
4959 
4960 	k = mI = MI = K=0;
4961 	while( MIndx[k] >= (blockrows   *(MI+1)) )++MI;	/* skipping preceding block rows .. */
4962 	while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI;	/* skipping preceding block columns .. */
4963 	blockrows    = (blockrows   *(MI+1)) - (blockrows   *(MI));
4964 	blockcolumns = (blockcolumns*(mI+1)) - (blockcolumns*(mI));
4965 	baserow = (blockrows   *(MI));
4966 	basecolumn = (blockcolumns*(mI));
4967 	*elements_per_block_row = 0;
4968 	*blocks_per_block_row   = 0;
4969 	elements_per_block_row[MI*0] += blockrows * blockcolumns;
4970 	blocks_per_block_row[MI]   +=1;
4971 
4972 	while(RSB_LIKELY(k<nnz))
4973 	{
4974 #ifdef DEBUG
4975 		if( MIndx[k] < baserow  )
4976 		{
4977 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k,(rsb_printf_int_t) (MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
4978 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
4979 			errval = RSB_ERR_INTERNAL_ERROR;
4980 			goto err;
4981 		}
4982 #endif
4983 
4984 		if( mIndx[k] >= basecolumn+blockcolumns  )
4985 		{
4986 			/* new block column, for sure */
4987 			mI = mIndx[k]/blockcolumns;
4988 			basecolumn = (blockcolumns*(mI));
4989 
4990 			if( MIndx[k] >= baserow+blockrows  )
4991 			{
4992 				/* new block row  */
4993 				MI = MIndx[k]/blockrows;
4994 				baserow = (blockrows   *(MI));
4995 			}
4996 			else
4997 			{
4998 				/* same block row  */
4999 			}
5000 			*elements_per_block_row += blockrows * blockcolumns;
5001 			blocks_per_block_row[MI]   +=1;
5002 			++K;
5003 		}
5004 		else
5005 		if( MIndx[k] >= baserow+blockrows  )
5006 		{
5007 			/* new row block, for sure */
5008 			MI = MIndx[k]/blockrows;
5009 			baserow = (blockrows   *(MI));
5010 
5011 			if( mIndx[k] < basecolumn  )
5012 			{
5013 				/* new row block, new block column  */
5014 				mI = mIndx[k]/blockcolumns;
5015 				basecolumn = (blockcolumns*(mI));
5016 			}
5017 			else
5018 			{
5019 				/* new row block, same column  */
5020 			}
5021 			/* get rid of this var : elements_per_block_row */
5022 			*elements_per_block_row += blockrows * blockcolumns;
5023 			blocks_per_block_row[MI]   +=1;
5024 			++K;
5025 		}
5026 		else
5027 		{
5028 			/* same block row for sure */
5029 		}
5030 		++k;
5031 	}
5032 	errval = RSB_ERR_NO_ERROR;goto ret;
5033 	}
5034 	errval = RSB_ERR_INTERNAL_ERROR;
5035 ret:	return errval;
5036 }
5037 
rsb__do_insert_sorted_optimized_css(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * MIndx,const rsb_coo_idx_t * mIndx,const rsb_nnz_idx_t nnz)5038 rsb_err_t rsb__do_insert_sorted_optimized_css( struct rsb_mtx_t * mtxAp, const void *VA, const rsb_coo_idx_t * MIndx, const rsb_coo_idx_t * mIndx, const rsb_nnz_idx_t nnz)
5039 {
5040 	/**
5041 	 	\ingroup gr_internals
5042 
5043 		elements_per_block_row and blocks_per_block_row arrays should be blanked.
5044 		FIXME : missing error handling.
5045 	*/
5046 	rsb_err_t errval = RSB_ERR_NO_ERROR;
5047 	rsb_nnz_idx_t n = 0;
5048 
5049 	/* in case of RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR, they are equal */
5050 	if(mtxAp->VA != VA)
5051 		rsb__memcpy(mtxAp->VA  ,VA  ,mtxAp->el_size*nnz);
5052 
5053 	for(n=0;n<nnz+1;++n)
5054 		mtxAp->indptr[n] = n;
5055 
5056 	for(n=0;n<mtxAp->nnz;++n)
5057 		mtxAp->bindx [n] = mIndx[n];
5058 	mtxAp->bindx [nnz] = 0;
5059 
5060 	// should also set bindx, indptr,
5061 	RSB_DO_ERR_RETURN(errval)
5062 }
5063 
rsb__do_insert_sorted_optimized(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop)5064 rsb_err_t rsb__do_insert_sorted_optimized( struct rsb_mtx_t * mtxAp, const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop)
5065 {
5066 	/*
5067 	 *	FIXME ! UNFINISHED
5068 	 * 	and please note that linked format is incomplete, so it does not support well block column major
5069 	 */
5070 	rsb_coo_idx_t blockrows = 0;
5071 	rsb_coo_idx_t blockcolumns = 0;
5072 	rsb_coo_idx_t baserow = 0;
5073 	rsb_coo_idx_t basecolumn = 0;
5074 	rsb_nnz_idx_t *indptr = mtxAp->indptr;
5075 	rsb_nnz_idx_t *bindx = mtxAp->bindx;
5076 	const rsb_coo_idx_t *Mpntr = NULL;
5077 	const rsb_coo_idx_t *mpntr = NULL;
5078 	const rsb_coo_idx_t *MIndx = NULL;
5079 	const rsb_coo_idx_t *mIndx = NULL;
5080 	rsb_blk_idx_t mI = 0, MI = 0;
5081 	rsb_err_t errval = RSB_ERR_NO_ERROR;
5082 	rsb_nnz_idx_t k = 0;	/* will index a nnz sized array */
5083 	rsb_nnz_idx_t K = 0;
5084 
5085 	if(nnz==0)
5086 	{
5087 		/* FIXME: new case, incomplete (useful for diagonal implicit matrices) */
5088 		K = 0;		/* if nnz == 0 then K == 0 */
5089 		bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5090 		return RSB_ERR_NO_ERROR;
5091 	}
5092 
5093 	if(0)
5094 		return rsb__do_insert_sorted( mtxAp, VA, IA, JA, nnz, pinfop);
5095 
5096 #if RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
5097 	if(!pinfop)
5098 	{
5099 		/* a performance fix */
5100 		if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
5101 			return rsb__do_insert_sorted_optimized_css( mtxAp, VA, JA, IA, nnz );
5102 		else
5103 			return rsb__do_insert_sorted_optimized_css( mtxAp, VA, IA, JA, nnz );
5104 	}
5105 #endif
5106 
5107 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
5108 	{
5109 		mpntr = pinfop->rpntr;
5110 		Mpntr = pinfop->cpntr;
5111 		mIndx = IA;
5112 		MIndx = JA;
5113 	}
5114 	else
5115 	{
5116 		Mpntr = pinfop->rpntr;
5117 		mpntr = pinfop->cpntr;
5118 		MIndx = IA;
5119 		mIndx = JA;
5120 	}
5121 
5122 
5123 	/*	type double, storage BCOR	*/
5124 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
5125 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5126 {
5127 	double * dst = mtxAp->VA;
5128 	k = mI = MI = 0;K = 0;
5129 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5130 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5131 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5132 	RSB_ASSERT( blockrows && blockcolumns);
5133 #else
5134 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5135 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5136 #endif
5137 
5138 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping preceding block rows .. */
5139 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping preceding block columns .. */
5140 	baserow = Mpntr[MI];
5141 	basecolumn = mpntr[mI];
5142 	bindx [ K ] = mI;			/* a new block */
5143 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5144 
5145 
5146 
5147 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5148 	{
5149 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5150 	}
5151 
5152 while(RSB_LIKELY(k<nnz))
5153 	{
5154 #ifdef DEBUG
5155 		if( MIndx[k] < baserow  )
5156 		{
5157 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5158 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5159 			errval = RSB_ERR_INTERNAL_ERROR;
5160 			goto err;/* NOTE : this jump could be evil */
5161 		}
5162 #endif
5163 
5164 		if( mIndx[k] >= basecolumn+blockcolumns  )
5165 		{
5166 			/* new block column, for sure */
5167 
5168 			while( mIndx[k] >= mpntr[mI+1] )++mI;
5169 			blockcolumns = mpntr[mI+1] - mpntr[mI];
5170 			basecolumn = mpntr[mI];
5171 
5172 			if( MIndx[k] >= baserow+blockrows  )
5173 			{
5174 				/* new block row  */
5175 
5176 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5177 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5178 				baserow = Mpntr[MI];
5179 			}
5180 			else
5181 			{
5182 				/* same block row  */
5183 			}
5184 			++K;
5185 			bindx [ K ] = mI;			/* a new block */
5186 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5187 		}
5188 		else
5189 		if( MIndx[k] >= baserow+blockrows  )
5190 		{
5191 			/* new row block, for sure */
5192 
5193 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5194 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5195 				baserow = Mpntr[MI];
5196 
5197 			if( mIndx[k] < basecolumn  )
5198 			{
5199 				/* new row block, new block column  */
5200 				mI = 0;
5201 
5202 				while( mIndx[k] >= mpntr[mI+1] )++mI;
5203 				blockcolumns = mpntr[mI+1] - mpntr[mI];
5204 				basecolumn = mpntr[mI];
5205 			}
5206 			else
5207 			{
5208 				/* new row block, same column  */
5209 			}
5210 			++K;
5211 			bindx [ K ] = mI;			/* a new block */
5212 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5213 		}
5214 		else
5215 		{
5216 			/* same block row for sure */
5217 		}
5218 		dst =  mtxAp->VA;
5219                 RSB_DEBUG_ASSERT(mI>=0);
5220                 RSB_DEBUG_ASSERT(MI>=0);
5221 
5222 
5223 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5224 		dst +=
5225 	indptr[ K ]
5226 	/*K * blockrows * blockcolumns*/
5227 	/*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5228 ;
5229 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5230 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5231 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5232 		dst[0] = ((const double*)VA)[k];
5233 		++k;
5234 	}
5235 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5236 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5237 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5238 }
5239 	/*	type float, storage BCOR	*/
5240 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT  )
5241 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5242 {
5243 	float * dst = mtxAp->VA;
5244 	k = mI = MI = 0;K = 0;
5245 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5246 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5247 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5248 	RSB_ASSERT( blockrows && blockcolumns);
5249 #else
5250 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5251 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5252 #endif
5253 
5254 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping preceding block rows .. */
5255 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping preceding block columns .. */
5256 	baserow = Mpntr[MI];
5257 	basecolumn = mpntr[mI];
5258 	bindx [ K ] = mI;			/* a new block */
5259 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5260 
5261 
5262 
5263 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5264 	{
5265 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5266 	}
5267 
5268 while(RSB_LIKELY(k<nnz))
5269 	{
5270 #ifdef DEBUG
5271 		if( MIndx[k] < baserow  )
5272 		{
5273 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5274 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5275 			errval = RSB_ERR_INTERNAL_ERROR;
5276 			goto err;/* NOTE : this jump could be evil */
5277 		}
5278 #endif
5279 
5280 		if( mIndx[k] >= basecolumn+blockcolumns  )
5281 		{
5282 			/* new block column, for sure */
5283 
5284 			while( mIndx[k] >= mpntr[mI+1] )++mI;
5285 			blockcolumns = mpntr[mI+1] - mpntr[mI];
5286 			basecolumn = mpntr[mI];
5287 
5288 			if( MIndx[k] >= baserow+blockrows  )
5289 			{
5290 				/* new block row  */
5291 
5292 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5293 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5294 				baserow = Mpntr[MI];
5295 			}
5296 			else
5297 			{
5298 				/* same block row  */
5299 			}
5300 			++K;
5301 			bindx [ K ] = mI;			/* a new block */
5302 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5303 		}
5304 		else
5305 		if( MIndx[k] >= baserow+blockrows  )
5306 		{
5307 			/* new row block, for sure */
5308 
5309 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5310 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5311 				baserow = Mpntr[MI];
5312 
5313 			if( mIndx[k] < basecolumn  )
5314 			{
5315 				/* new row block, new block column  */
5316 				mI = 0;
5317 
5318 				while( mIndx[k] >= mpntr[mI+1] )++mI;
5319 				blockcolumns = mpntr[mI+1] - mpntr[mI];
5320 				basecolumn = mpntr[mI];
5321 			}
5322 			else
5323 			{
5324 				/* new row block, same column  */
5325 			}
5326 			++K;
5327 			bindx [ K ] = mI;			/* a new block */
5328 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5329 		}
5330 		else
5331 		{
5332 			/* same block row for sure */
5333 		}
5334 		dst =  mtxAp->VA;
5335                 RSB_DEBUG_ASSERT(mI>=0);
5336                 RSB_DEBUG_ASSERT(MI>=0);
5337 
5338 
5339 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5340 		dst +=
5341 	indptr[ K ]
5342 	/*K * blockrows * blockcolumns*/
5343 	/*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5344 ;
5345 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5346 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5347 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5348 		dst[0] = ((const float*)VA)[k];
5349 		++k;
5350 	}
5351 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5352 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5353 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5354 }
5355 	/*	type float complex, storage BCOR	*/
5356 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
5357 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5358 {
5359 	float complex * dst = mtxAp->VA;
5360 	k = mI = MI = 0;K = 0;
5361 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5362 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5363 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5364 	RSB_ASSERT( blockrows && blockcolumns);
5365 #else
5366 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5367 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5368 #endif
5369 
5370 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping preceding block rows .. */
5371 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping preceding block columns .. */
5372 	baserow = Mpntr[MI];
5373 	basecolumn = mpntr[mI];
5374 	bindx [ K ] = mI;			/* a new block */
5375 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5376 
5377 
5378 
5379 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5380 	{
5381 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5382 	}
5383 
5384 while(RSB_LIKELY(k<nnz))
5385 	{
5386 #ifdef DEBUG
5387 		if( MIndx[k] < baserow  )
5388 		{
5389 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5390 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5391 			errval = RSB_ERR_INTERNAL_ERROR;
5392 			goto err;/* NOTE : this jump could be evil */
5393 		}
5394 #endif
5395 
5396 		if( mIndx[k] >= basecolumn+blockcolumns  )
5397 		{
5398 			/* new block column, for sure */
5399 
5400 			while( mIndx[k] >= mpntr[mI+1] )++mI;
5401 			blockcolumns = mpntr[mI+1] - mpntr[mI];
5402 			basecolumn = mpntr[mI];
5403 
5404 			if( MIndx[k] >= baserow+blockrows  )
5405 			{
5406 				/* new block row  */
5407 
5408 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5409 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5410 				baserow = Mpntr[MI];
5411 			}
5412 			else
5413 			{
5414 				/* same block row  */
5415 			}
5416 			++K;
5417 			bindx [ K ] = mI;			/* a new block */
5418 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5419 		}
5420 		else
5421 		if( MIndx[k] >= baserow+blockrows  )
5422 		{
5423 			/* new row block, for sure */
5424 
5425 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5426 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5427 				baserow = Mpntr[MI];
5428 
5429 			if( mIndx[k] < basecolumn  )
5430 			{
5431 				/* new row block, new block column  */
5432 				mI = 0;
5433 
5434 				while( mIndx[k] >= mpntr[mI+1] )++mI;
5435 				blockcolumns = mpntr[mI+1] - mpntr[mI];
5436 				basecolumn = mpntr[mI];
5437 			}
5438 			else
5439 			{
5440 				/* new row block, same column  */
5441 			}
5442 			++K;
5443 			bindx [ K ] = mI;			/* a new block */
5444 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5445 		}
5446 		else
5447 		{
5448 			/* same block row for sure */
5449 		}
5450 		dst =  mtxAp->VA;
5451                 RSB_DEBUG_ASSERT(mI>=0);
5452                 RSB_DEBUG_ASSERT(MI>=0);
5453 
5454 
5455 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5456 		dst +=
5457 	indptr[ K ]
5458 	/*K * blockrows * blockcolumns*/
5459 	/*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5460 ;
5461 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5462 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5463 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5464 		dst[0] = ((const float complex*)VA)[k];
5465 		++k;
5466 	}
5467 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5468 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5469 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5470 }
5471 	/*	type double complex, storage BCOR	*/
5472 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
5473 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5474 {
5475 	double complex * dst = mtxAp->VA;
5476 	k = mI = MI = 0;K = 0;
5477 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5478 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5479 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5480 	RSB_ASSERT( blockrows && blockcolumns);
5481 #else
5482 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5483 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5484 #endif
5485 
5486 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping preceding block rows .. */
5487 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping preceding block columns .. */
5488 	baserow = Mpntr[MI];
5489 	basecolumn = mpntr[mI];
5490 	bindx [ K ] = mI;			/* a new block */
5491 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5492 
5493 
5494 
5495 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5496 	{
5497 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5498 	}
5499 
5500 while(RSB_LIKELY(k<nnz))
5501 	{
5502 #ifdef DEBUG
5503 		if( MIndx[k] < baserow  )
5504 		{
5505 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5506 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5507 			errval = RSB_ERR_INTERNAL_ERROR;
5508 			goto err;/* NOTE : this jump could be evil */
5509 		}
5510 #endif
5511 
5512 		if( mIndx[k] >= basecolumn+blockcolumns  )
5513 		{
5514 			/* new block column, for sure */
5515 
5516 			while( mIndx[k] >= mpntr[mI+1] )++mI;
5517 			blockcolumns = mpntr[mI+1] - mpntr[mI];
5518 			basecolumn = mpntr[mI];
5519 
5520 			if( MIndx[k] >= baserow+blockrows  )
5521 			{
5522 				/* new block row  */
5523 
5524 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5525 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5526 				baserow = Mpntr[MI];
5527 			}
5528 			else
5529 			{
5530 				/* same block row  */
5531 			}
5532 			++K;
5533 			bindx [ K ] = mI;			/* a new block */
5534 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5535 		}
5536 		else
5537 		if( MIndx[k] >= baserow+blockrows  )
5538 		{
5539 			/* new row block, for sure */
5540 
5541 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
5542 				blockrows    = Mpntr[MI+1] - Mpntr[MI];
5543 				baserow = Mpntr[MI];
5544 
5545 			if( mIndx[k] < basecolumn  )
5546 			{
5547 				/* new row block, new block column  */
5548 				mI = 0;
5549 
5550 				while( mIndx[k] >= mpntr[mI+1] )++mI;
5551 				blockcolumns = mpntr[mI+1] - mpntr[mI];
5552 				basecolumn = mpntr[mI];
5553 			}
5554 			else
5555 			{
5556 				/* new row block, same column  */
5557 			}
5558 			++K;
5559 			bindx [ K ] = mI;			/* a new block */
5560 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5561 		}
5562 		else
5563 		{
5564 			/* same block row for sure */
5565 		}
5566 		dst =  mtxAp->VA;
5567                 RSB_DEBUG_ASSERT(mI>=0);
5568                 RSB_DEBUG_ASSERT(MI>=0);
5569 
5570 
5571 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5572 		dst +=
5573 	indptr[ K ]
5574 	/*K * blockrows * blockcolumns*/
5575 	/*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5576 ;
5577 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5578 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5579 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5580 		dst[0] = ((const double complex*)VA)[k];
5581 		++k;
5582 	}
5583 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5584 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5585 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5586 }
5587 	/*	type double, storage BCSR	*/
5588 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
5589 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5590 {
5591 	double * dst = mtxAp->VA;
5592 	k = mI = MI = 0;K = 0;
5593 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5594 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5595 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5596 	RSB_ASSERT( blockrows && blockcolumns);
5597 #else
5598 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5599 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5600 #endif
5601 
5602 	while( MIndx[k] >= (blockrows   *(MI+1)) )++MI;	/* skipping preceding block rows .. */
5603 	while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI;	/* skipping preceding block columns .. */
5604 	baserow = (blockrows   *(MI));
5605 	basecolumn = (blockcolumns*(mI));
5606 	bindx [ K ] = mI;			/* a new block */
5607 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5608 
5609 
5610 
5611 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5612 	{
5613 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5614 	}
5615 
5616 while(RSB_LIKELY(k<nnz))
5617 	{
5618 #ifdef DEBUG
5619 		if( MIndx[k] < baserow  )
5620 		{
5621 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5622 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5623 			errval = RSB_ERR_INTERNAL_ERROR;
5624 			goto err;/* NOTE : this jump could be evil */
5625 		}
5626 #endif
5627 
5628 		if( mIndx[k] >= basecolumn+blockcolumns  )
5629 		{
5630 			/* new block column, for sure */
5631 			mI = mIndx[k]/blockcolumns;
5632 			basecolumn = (blockcolumns*(mI));
5633 
5634 			if( MIndx[k] >= baserow+blockrows  )
5635 			{
5636 				/* new block row  */
5637 				MI = MIndx[k]/blockrows;
5638 				baserow = (blockrows   *(MI));
5639 			}
5640 			else
5641 			{
5642 				/* same block row  */
5643 			}
5644 			++K;
5645 			bindx [ K ] = mI;			/* a new block */
5646 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5647 		}
5648 		else
5649 		if( MIndx[k] >= baserow+blockrows  )
5650 		{
5651 			/* new row block, for sure */
5652 				MI = MIndx[k]/blockrows;
5653 				baserow = (blockrows   *(MI));
5654 
5655 			if( mIndx[k] < basecolumn  )
5656 			{
5657 				/* new row block, new block column  */
5658 				mI = 0;
5659 				mI = mIndx[k]/blockcolumns;
5660 				basecolumn = (blockcolumns*(mI));
5661 			}
5662 			else
5663 			{
5664 				/* new row block, same column  */
5665 			}
5666 			++K;
5667 			bindx [ K ] = mI;			/* a new block */
5668 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5669 		}
5670 		else
5671 		{
5672 			/* same block row for sure */
5673 		}
5674 		dst =  mtxAp->VA;
5675                 RSB_DEBUG_ASSERT(mI>=0);
5676                 RSB_DEBUG_ASSERT(MI>=0);
5677 
5678 
5679 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5680 		dst +=
5681 	K * blockrows * blockcolumns
5682 ;
5683 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5684 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5685 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5686 		dst[0] = ((const double*)VA)[k];
5687 		++k;
5688 	}
5689 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5690 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5691 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5692 }
5693 	/*	type float, storage BCSR	*/
5694 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT  )
5695 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5696 {
5697 	float * dst = mtxAp->VA;
5698 	k = mI = MI = 0;K = 0;
5699 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5700 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5701 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5702 	RSB_ASSERT( blockrows && blockcolumns);
5703 #else
5704 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5705 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5706 #endif
5707 
5708 	while( MIndx[k] >= (blockrows   *(MI+1)) )++MI;	/* skipping preceding block rows .. */
5709 	while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI;	/* skipping preceding block columns .. */
5710 	baserow = (blockrows   *(MI));
5711 	basecolumn = (blockcolumns*(mI));
5712 	bindx [ K ] = mI;			/* a new block */
5713 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5714 
5715 
5716 
5717 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5718 	{
5719 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5720 	}
5721 
5722 while(RSB_LIKELY(k<nnz))
5723 	{
5724 #ifdef DEBUG
5725 		if( MIndx[k] < baserow  )
5726 		{
5727 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5728 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5729 			errval = RSB_ERR_INTERNAL_ERROR;
5730 			goto err;/* NOTE : this jump could be evil */
5731 		}
5732 #endif
5733 
5734 		if( mIndx[k] >= basecolumn+blockcolumns  )
5735 		{
5736 			/* new block column, for sure */
5737 			mI = mIndx[k]/blockcolumns;
5738 			basecolumn = (blockcolumns*(mI));
5739 
5740 			if( MIndx[k] >= baserow+blockrows  )
5741 			{
5742 				/* new block row  */
5743 				MI = MIndx[k]/blockrows;
5744 				baserow = (blockrows   *(MI));
5745 			}
5746 			else
5747 			{
5748 				/* same block row  */
5749 			}
5750 			++K;
5751 			bindx [ K ] = mI;			/* a new block */
5752 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5753 		}
5754 		else
5755 		if( MIndx[k] >= baserow+blockrows  )
5756 		{
5757 			/* new row block, for sure */
5758 				MI = MIndx[k]/blockrows;
5759 				baserow = (blockrows   *(MI));
5760 
5761 			if( mIndx[k] < basecolumn  )
5762 			{
5763 				/* new row block, new block column  */
5764 				mI = 0;
5765 				mI = mIndx[k]/blockcolumns;
5766 				basecolumn = (blockcolumns*(mI));
5767 			}
5768 			else
5769 			{
5770 				/* new row block, same column  */
5771 			}
5772 			++K;
5773 			bindx [ K ] = mI;			/* a new block */
5774 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5775 		}
5776 		else
5777 		{
5778 			/* same block row for sure */
5779 		}
5780 		dst =  mtxAp->VA;
5781                 RSB_DEBUG_ASSERT(mI>=0);
5782                 RSB_DEBUG_ASSERT(MI>=0);
5783 
5784 
5785 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5786 		dst +=
5787 	K * blockrows * blockcolumns
5788 ;
5789 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5790 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5791 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5792 		dst[0] = ((const float*)VA)[k];
5793 		++k;
5794 	}
5795 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5796 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5797 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5798 }
5799 	/*	type float complex, storage BCSR	*/
5800 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
5801 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5802 {
5803 	float complex * dst = mtxAp->VA;
5804 	k = mI = MI = 0;K = 0;
5805 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5806 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5807 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5808 	RSB_ASSERT( blockrows && blockcolumns);
5809 #else
5810 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5811 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5812 #endif
5813 
5814 	while( MIndx[k] >= (blockrows   *(MI+1)) )++MI;	/* skipping preceding block rows .. */
5815 	while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI;	/* skipping preceding block columns .. */
5816 	baserow = (blockrows   *(MI));
5817 	basecolumn = (blockcolumns*(mI));
5818 	bindx [ K ] = mI;			/* a new block */
5819 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5820 
5821 
5822 
5823 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5824 	{
5825 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5826 	}
5827 
5828 while(RSB_LIKELY(k<nnz))
5829 	{
5830 #ifdef DEBUG
5831 		if( MIndx[k] < baserow  )
5832 		{
5833 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5834 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5835 			errval = RSB_ERR_INTERNAL_ERROR;
5836 			goto err;/* NOTE : this jump could be evil */
5837 		}
5838 #endif
5839 
5840 		if( mIndx[k] >= basecolumn+blockcolumns  )
5841 		{
5842 			/* new block column, for sure */
5843 			mI = mIndx[k]/blockcolumns;
5844 			basecolumn = (blockcolumns*(mI));
5845 
5846 			if( MIndx[k] >= baserow+blockrows  )
5847 			{
5848 				/* new block row  */
5849 				MI = MIndx[k]/blockrows;
5850 				baserow = (blockrows   *(MI));
5851 			}
5852 			else
5853 			{
5854 				/* same block row  */
5855 			}
5856 			++K;
5857 			bindx [ K ] = mI;			/* a new block */
5858 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5859 		}
5860 		else
5861 		if( MIndx[k] >= baserow+blockrows  )
5862 		{
5863 			/* new row block, for sure */
5864 				MI = MIndx[k]/blockrows;
5865 				baserow = (blockrows   *(MI));
5866 
5867 			if( mIndx[k] < basecolumn  )
5868 			{
5869 				/* new row block, new block column  */
5870 				mI = 0;
5871 				mI = mIndx[k]/blockcolumns;
5872 				basecolumn = (blockcolumns*(mI));
5873 			}
5874 			else
5875 			{
5876 				/* new row block, same column  */
5877 			}
5878 			++K;
5879 			bindx [ K ] = mI;			/* a new block */
5880 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5881 		}
5882 		else
5883 		{
5884 			/* same block row for sure */
5885 		}
5886 		dst =  mtxAp->VA;
5887                 RSB_DEBUG_ASSERT(mI>=0);
5888                 RSB_DEBUG_ASSERT(MI>=0);
5889 
5890 
5891 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5892 		dst +=
5893 	K * blockrows * blockcolumns
5894 ;
5895 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5896 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5897 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5898 		dst[0] = ((const float complex*)VA)[k];
5899 		++k;
5900 	}
5901 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
5902 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
5903 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
5904 }
5905 	/*	type double complex, storage BCSR	*/
5906 	if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
5907 	if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5908 {
5909 	double complex * dst = mtxAp->VA;
5910 	k = mI = MI = 0;K = 0;
5911 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5912 /*	rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5913 	rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5914 	RSB_ASSERT( blockrows && blockcolumns);
5915 #else
5916 	blockrows    = Mpntr[MI+1] - Mpntr[MI];
5917 	blockcolumns = mpntr[mI+1] - mpntr[mI];
5918 #endif
5919 
5920 	while( MIndx[k] >= (blockrows   *(MI+1)) )++MI;	/* skipping preceding block rows .. */
5921 	while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI;	/* skipping preceding block columns .. */
5922 	baserow = (blockrows   *(MI));
5923 	basecolumn = (blockcolumns*(mI));
5924 	bindx [ K ] = mI;			/* a new block */
5925 	indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;	/* FIXME : DUPLICATION ?! see later */
5926 
5927 
5928 
5929 	if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5930 	{
5931 		//RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5932 	}
5933 
5934 while(RSB_LIKELY(k<nnz))
5935 	{
5936 #ifdef DEBUG
5937 		if( MIndx[k] < baserow  )
5938 		{
5939 			RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5940 			RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5941 			errval = RSB_ERR_INTERNAL_ERROR;
5942 			goto err;/* NOTE : this jump could be evil */
5943 		}
5944 #endif
5945 
5946 		if( mIndx[k] >= basecolumn+blockcolumns  )
5947 		{
5948 			/* new block column, for sure */
5949 			mI = mIndx[k]/blockcolumns;
5950 			basecolumn = (blockcolumns*(mI));
5951 
5952 			if( MIndx[k] >= baserow+blockrows  )
5953 			{
5954 				/* new block row  */
5955 				MI = MIndx[k]/blockrows;
5956 				baserow = (blockrows   *(MI));
5957 			}
5958 			else
5959 			{
5960 				/* same block row  */
5961 			}
5962 			++K;
5963 			bindx [ K ] = mI;			/* a new block */
5964 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5965 		}
5966 		else
5967 		if( MIndx[k] >= baserow+blockrows  )
5968 		{
5969 			/* new row block, for sure */
5970 				MI = MIndx[k]/blockrows;
5971 				baserow = (blockrows   *(MI));
5972 
5973 			if( mIndx[k] < basecolumn  )
5974 			{
5975 				/* new row block, new block column  */
5976 				mI = 0;
5977 				mI = mIndx[k]/blockcolumns;
5978 				basecolumn = (blockcolumns*(mI));
5979 			}
5980 			else
5981 			{
5982 				/* new row block, same column  */
5983 			}
5984 			++K;
5985 			bindx [ K ] = mI;			/* a new block */
5986 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
5987 		}
5988 		else
5989 		{
5990 			/* same block row for sure */
5991 		}
5992 		dst =  mtxAp->VA;
5993                 RSB_DEBUG_ASSERT(mI>=0);
5994                 RSB_DEBUG_ASSERT(MI>=0);
5995 
5996 
5997 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5998 		dst +=
5999 	K * blockrows * blockcolumns
6000 ;
6001 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
6002 		dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
6003 		RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
6004 		dst[0] = ((const double complex*)VA)[k];
6005 		++k;
6006 	}
6007 	if(nnz)++K;	/* if nnz == 0 then K = 0 */
6008 	bindx[K] = 0;	// the first element off the working bindx should be set to a safe value
6009 	return RSB_ERR_NO_ERROR;	/* FIXME ! */
6010 }
6011 	errval = RSB_ERR_INTERNAL_ERROR;
6012 	return errval;
6013 }
6014 
rsb__dump_block(rsb_type_t type,const void * VA,rsb_blk_idx_t roff,rsb_blk_idx_t coff,rsb_blk_idx_t rows,rsb_blk_idx_t cols)6015 rsb_err_t rsb__dump_block(rsb_type_t type, const void * VA, rsb_blk_idx_t roff, rsb_blk_idx_t coff, rsb_blk_idx_t rows, rsb_blk_idx_t cols )
6016 {
6017 	/*!
6018 	 * Will dump to stdout a dense matrix.
6019 	 * Used for debugging purposes.
6020 	 *
6021 	 * FIXME : should be integrated with the macro subsystem in util.m4, and support column major order, and debugged.
6022 	 */
6023 #if RSB_ALLOW_STDOUT
6024 	register rsb_coo_idx_t i, j;
6025 
6026 	if(RSB_BLK_MUL_OVERFLOW(rows,cols))
6027 		return RSB_ERR_LIMITS;
6028 
6029 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6030 	if(type == RSB_NUMERICAL_TYPE_DOUBLE )
6031 	{
6032 		for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6033 		if(((double*)VA)[cols*i+j]!=((double)(0)) )
6034 		{ RSB_STDOUT(""
6035 		"%zd"/* FIXME : this could be any index type! */
6036 		"\t"
6037 		"%zd"
6038 		"\t"
6039 		RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING
6040 		"\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6041 ((double*)VA)[cols*i+j]);
6042 		}
6043 		return RSB_ERR_NO_ERROR;
6044 	}
6045 #endif
6046 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6047 	if(type == RSB_NUMERICAL_TYPE_FLOAT )
6048 	{
6049 		for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6050 		if(((float*)VA)[cols*i+j]!=((float)(0)) )
6051 		{ RSB_STDOUT(""
6052 		"%zd"/* FIXME : this could be any index type! */
6053 		"\t"
6054 		"%zd"
6055 		"\t"
6056 		RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING
6057 		"\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6058 ((float*)VA)[cols*i+j]);
6059 		}
6060 		return RSB_ERR_NO_ERROR;
6061 	}
6062 #endif
6063 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6064 	if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6065 	{
6066 		for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6067 		if(((float complex*)VA)[cols*i+j]!=((float complex)(0)) )
6068 		{ RSB_STDOUT(""
6069 		"%zd"/* FIXME : this could be any index type! */
6070 		"\t"
6071 		"%zd"
6072 		"\t"
6073 		RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING
6074 		"\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6075 crealf(((float complex*)VA)[cols*i+j]),cimagf(((float complex*)VA)[cols*i+j]));
6076 		}
6077 		return RSB_ERR_NO_ERROR;
6078 	}
6079 #endif
6080 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6081 	if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6082 	{
6083 		for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6084 		if(((double complex*)VA)[cols*i+j]!=((double complex)(0)) )
6085 		{ RSB_STDOUT(""
6086 		"%zd"/* FIXME : this could be any index type! */
6087 		"\t"
6088 		"%zd"
6089 		"\t"
6090 		RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING
6091 		"\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6092 creal(((double complex*)VA)[cols*i+j]),cimag(((double complex*)VA)[cols*i+j]));
6093 		}
6094 		return RSB_ERR_NO_ERROR;
6095 	}
6096 #endif
6097 	return RSB_ERR_UNSUPPORTED_TYPE	;
6098 #else
6099 	return RSB_ERR_UNSUPPORTED_FEATURE;
6100 #endif
6101 }
6102 
rsb__dump_blocks(const struct rsb_mtx_t * mtxAp)6103 rsb_err_t rsb__dump_blocks(const struct rsb_mtx_t *mtxAp)
6104 {
6105 	return RSB_ERR_UNIMPLEMENTED_YET;
6106 #if 0
6107 	/*!
6108 	 * \ingroup gr_internals
6109 	 * A debug function for printing out the matrix structure.
6110 	 *
6111 	 * FIXME : UNFINISHED
6112 	 * Note : it is extremely slow.
6113 	 **/
6114 	rsb_blk_idx_t i,j;
6115 	if(!mtxAp)return RSB_ERR_BADARGS;
6116 	if(!mtxAp->options)return RSB_ERR_BADARGS;
6117 
6118 	RSB_STDERR("\t block structure :\n");
6119 
6120 	/* this prints out the matrix blocks nnz structure */
6121 	for(i=0;i<mtxAp->M_b;++i)
6122 	{
6123 		for(j=0;j<mtxAp->K_b;++j)
6124 		if((RSB_BITMAP_GET(mtxAp->options->bitmap,mtxAp->M_b,mtxAp->K_b,i,j)))
6125 		{
6126 			RSB_STDERR("1");
6127 		}
6128 		else
6129 		{
6130 			RSB_STDERR("0");
6131 		}
6132 		RSB_STDERR("\n");
6133 	}
6134 	return RSB_ERR_NO_ERROR;
6135 #endif
6136 }
6137 
rsb__test_print_csr(rsb_type_t type,rsb_flags_t flags,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const void * VA,rsb_coo_idx_t rows,rsb_coo_idx_t cols,rsb_nnz_idx_t nnz,rsb_bool_t want_header,FILE * stream)6138 rsb_err_t rsb__test_print_csr(rsb_type_t type, rsb_flags_t flags, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const void * VA, rsb_coo_idx_t rows, rsb_coo_idx_t cols, rsb_nnz_idx_t nnz, rsb_bool_t want_header, FILE*stream)
6139 {
6140 	/**
6141 	 * \ingroup gr_internals
6142 	 * Dumps out a whole matrix, from its CSR representation.
6143 	 *
6144 	 * Warning : the nonzeros should be sorted on input.
6145 	 */
6146 #if RSB_ALLOW_STDOUT
6147 	rsb_coo_idx_t k;
6148 	if( !stream )goto err;
6149 	if( !IA )goto err;
6150 	if( ( !JA || !VA ) && nnz>0  )goto err;
6151 
6152 	RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t)rows);
6153 	/* RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t) nnz); */
6154 	for(k=0;k<rows+1;++k) { RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t)(IA[k]+1)); }
6155 	for(k=0;k<nnz   ;++k) { RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t)(JA[k]+1)); }
6156 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6157 	if(type == RSB_NUMERICAL_TYPE_DOUBLE )
6158 	{
6159 		for(k=0;k<nnz;++k)
6160 		{
6161 			RSB_FPRINTF(stream,
6162 				RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING
6163 				"\n"
6164 				,((double*)VA)[k]);
6165 		}
6166 		return RSB_ERR_NO_ERROR;
6167 	}
6168 #endif
6169 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6170 	if(type == RSB_NUMERICAL_TYPE_FLOAT )
6171 	{
6172 		for(k=0;k<nnz;++k)
6173 		{
6174 			RSB_FPRINTF(stream,
6175 				RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING
6176 				"\n"
6177 				,((float*)VA)[k]);
6178 		}
6179 		return RSB_ERR_NO_ERROR;
6180 	}
6181 #endif
6182 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6183 	if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6184 	{
6185 		for(k=0;k<nnz;++k)
6186 		{
6187 			RSB_FPRINTF(stream,
6188 				RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING
6189 				"\n"
6190 				,crealf(((float complex*)VA)[k]),cimagf(((float complex*)VA)[k]));
6191 		}
6192 		return RSB_ERR_NO_ERROR;
6193 	}
6194 #endif
6195 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6196 	if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6197 	{
6198 		for(k=0;k<nnz;++k)
6199 		{
6200 			RSB_FPRINTF(stream,
6201 				RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING
6202 				"\n"
6203 				,creal(((double complex*)VA)[k]),cimag(((double complex*)VA)[k]));
6204 		}
6205 		return RSB_ERR_NO_ERROR;
6206 	}
6207 #endif
6208 err:
6209 	return RSB_ERR_GENERIC_ERROR;
6210 #else
6211 	return RSB_ERR_UNSUPPORTED_FEATURE;
6212 #endif
6213 }
6214 
rsb__test_print_coo_mm(rsb_type_t type,rsb_flags_t flags,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const void * VA,rsb_coo_idx_t rows,rsb_coo_idx_t cols,rsb_nnz_idx_t nnz,rsb_bool_t want_header,FILE * stream)6215 rsb_err_t rsb__test_print_coo_mm(rsb_type_t type, rsb_flags_t flags, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const void * VA, rsb_coo_idx_t rows, rsb_coo_idx_t cols, rsb_nnz_idx_t nnz, rsb_bool_t want_header, FILE*stream)
6216 {
6217 	/**
6218 	 * \ingroup gr_internals
6219 	 * Dumps out a whole matrix, from its coordinates, in matrix market format.
6220 	 *
6221 	 * Warning : the nonzeros should be sorted on input.
6222 	 */
6223 #if RSB_ALLOW_STDOUT
6224 	rsb_coo_idx_t k;
6225 	const char * ts = RSB_IS_MATRIX_TYPE_COMPLEX(type)?"complex":"real";
6226 	const char * ss = RSB_SYMMETRY_STRING(flags);
6227 
6228 	if( !stream )
6229 	{
6230 		goto err;
6231 	}
6232 
6233 	if( ( !IA || !JA || !VA ) && nnz > 0 )
6234 		goto err;
6235 	if( rows < 0 || cols < 0 || nnz < 0 )
6236 		goto err;
6237 
6238 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6239 	if(type == RSB_NUMERICAL_TYPE_DOUBLE )
6240 	{
6241 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6242 /*		for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6243 		for(k=0;k<nnz;++k)
6244 		{
6245 			RSB_FPRINTF(stream,
6246 				"%zd"
6247 				"\t"
6248 				"%zd"
6249 				"\t"
6250 				RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING
6251 				"\n"
6252 				,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((double*)VA)[k]);
6253 		}
6254 		return RSB_ERR_NO_ERROR;
6255 	}
6256 #endif
6257 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6258 	if(type == RSB_NUMERICAL_TYPE_FLOAT )
6259 	{
6260 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6261 /*		for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6262 		for(k=0;k<nnz;++k)
6263 		{
6264 			RSB_FPRINTF(stream,
6265 				"%zd"
6266 				"\t"
6267 				"%zd"
6268 				"\t"
6269 				RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING
6270 				"\n"
6271 				,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]);
6272 		}
6273 		return RSB_ERR_NO_ERROR;
6274 	}
6275 #endif
6276 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6277 	if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6278 	{
6279 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6280 /*		for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6281 		for(k=0;k<nnz;++k)
6282 		{
6283 			RSB_FPRINTF(stream,
6284 				"%zd"
6285 				"\t"
6286 				"%zd"
6287 				"\t"
6288 				RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING
6289 				"\n"
6290 				,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),crealf(((float complex*)VA)[k]),cimagf(((float complex*)VA)[k]));
6291 		}
6292 		return RSB_ERR_NO_ERROR;
6293 	}
6294 #endif
6295 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6296 	if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6297 	{
6298 		if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6299 /*		for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6300 		for(k=0;k<nnz;++k)
6301 		{
6302 			RSB_FPRINTF(stream,
6303 				"%zd"
6304 				"\t"
6305 				"%zd"
6306 				"\t"
6307 				RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING
6308 				"\n"
6309 				,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),creal(((double complex*)VA)[k]),cimag(((double complex*)VA)[k]));
6310 		}
6311 		return RSB_ERR_NO_ERROR;
6312 	}
6313 #endif
6314 err:
6315 	return RSB_ERR_GENERIC_ERROR;
6316 #else
6317 	return RSB_ERR_UNSUPPORTED_FEATURE;
6318 #endif
6319 }
6320 
rsb__do_sizeof(rsb_type_t type)6321 /*static*/ /*inline*/ size_t rsb__do_sizeof(rsb_type_t type)	{
6322 		/*
6323 		 * FIXME : UNUSED ?
6324 		 */
6325 		size_t so = 0;
6326 		switch(type)
6327 		{
6328 			/* supported (double,float,float complex,double complex) */
6329 			case RSB_NUMERICAL_TYPE_DOUBLE 	:
6330 				so = sizeof(double);
6331 			break;
6332 			case RSB_NUMERICAL_TYPE_FLOAT 	:
6333 				so = sizeof(float);
6334 			break;
6335 			case RSB_NUMERICAL_TYPE_FLOAT_COMPLEX 	:
6336 				so = sizeof(float complex);
6337 			break;
6338 			case RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX 	:
6339 				so = sizeof(double complex);
6340 			break;
6341 			/* unsupported type */
6342 			default :
6343 			RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS
6344 		}
6345 		return so;
6346 	}
6347 
rsb__do_coo_sum(struct rsb_coo_matrix_t * coocp,const void * alphap,const struct rsb_coo_matrix_t * cooap,const void * betap,const struct rsb_coo_matrix_t * coobp)6348 rsb_err_t rsb__do_coo_sum( struct rsb_coo_matrix_t*coocp, const void *alphap, const struct rsb_coo_matrix_t*cooap, const void *betap,  const struct rsb_coo_matrix_t*coobp)
6349 {
6350 	struct rsb_coo_matrix_t cooa = *cooap, coob = *coobp, cooc = *coocp;
6351 	rsb_nnz_idx_t /*rnz = 0,*/an, bn, cn;
6352 
6353 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6354 	if(cooa.typecode == RSB_NUMERICAL_TYPE_DOUBLE )
6355 	{
6356 	double alpha = alphap?*(double*)alphap:((double)(1.0));
6357 	double beta  = betap ?*(double*)betap :((double)(1.0));
6358 	for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6359 	{
6360 		rsb_nnz_idx_t ap = an, bp = bn;
6361 		if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6362 			cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6363 			((double*)cooc.VA)[cn] = alpha * ((double*)cooa.VA)[an] + beta * ((double*)coob.VA)[bn],
6364 			ap = an, bp = bn, ++cn, ++an, ++bn;
6365 
6366 		for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6367 			//RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6368 			((double*)cooc.VA)[cn] += alpha * ((double*)cooa.VA)[an];
6369 
6370 		for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6371 			//RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6372 			((double*)cooc.VA)[cn] += beta  * ((double*)coob.VA)[bn];
6373 
6374 		if( bn<coob.nnz )
6375 		for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6376 			       	(cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6377 			       	;++an)
6378 				//RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6379 			cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6380 			((double*)cooc.VA)[cn] = alpha * ((double*)cooa.VA)[an],
6381 			++cn;
6382 
6383 		if( an<cooa.nnz )
6384 		for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6385 			       	(cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6386 			       	;++bn)
6387 			//	RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6388 			cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6389 			((double*)cooc.VA)[cn] = beta * ((double*)coob.VA)[bn],
6390 			++cn;
6391 		//RSB_STDOUT("? %d %d\n",an,bn);
6392 	}
6393 	}
6394 	else
6395 #endif
6396 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6397 	if(cooa.typecode == RSB_NUMERICAL_TYPE_FLOAT )
6398 	{
6399 	float alpha = alphap?*(float*)alphap:((float)(1.0));
6400 	float beta  = betap ?*(float*)betap :((float)(1.0));
6401 	for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6402 	{
6403 		rsb_nnz_idx_t ap = an, bp = bn;
6404 		if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6405 			cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6406 			((float*)cooc.VA)[cn] = alpha * ((float*)cooa.VA)[an] + beta * ((float*)coob.VA)[bn],
6407 			ap = an, bp = bn, ++cn, ++an, ++bn;
6408 
6409 		for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6410 			//RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6411 			((float*)cooc.VA)[cn] += alpha * ((float*)cooa.VA)[an];
6412 
6413 		for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6414 			//RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6415 			((float*)cooc.VA)[cn] += beta  * ((float*)coob.VA)[bn];
6416 
6417 		if( bn<coob.nnz )
6418 		for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6419 			       	(cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6420 			       	;++an)
6421 				//RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6422 			cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6423 			((float*)cooc.VA)[cn] = alpha * ((float*)cooa.VA)[an],
6424 			++cn;
6425 
6426 		if( an<cooa.nnz )
6427 		for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6428 			       	(cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6429 			       	;++bn)
6430 			//	RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6431 			cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6432 			((float*)cooc.VA)[cn] = beta * ((float*)coob.VA)[bn],
6433 			++cn;
6434 		//RSB_STDOUT("? %d %d\n",an,bn);
6435 	}
6436 	}
6437 	else
6438 #endif
6439 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6440 	if(cooa.typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6441 	{
6442 	float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
6443 	float complex beta  = betap ?*(float complex*)betap :((float complex)(1.0));
6444 	for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6445 	{
6446 		rsb_nnz_idx_t ap = an, bp = bn;
6447 		if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6448 			cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6449 			((float complex*)cooc.VA)[cn] = alpha * ((float complex*)cooa.VA)[an] + beta * ((float complex*)coob.VA)[bn],
6450 			ap = an, bp = bn, ++cn, ++an, ++bn;
6451 
6452 		for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6453 			//RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6454 			((float complex*)cooc.VA)[cn] += alpha * ((float complex*)cooa.VA)[an];
6455 
6456 		for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6457 			//RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6458 			((float complex*)cooc.VA)[cn] += beta  * ((float complex*)coob.VA)[bn];
6459 
6460 		if( bn<coob.nnz )
6461 		for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6462 			       	(cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6463 			       	;++an)
6464 				//RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6465 			cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6466 			((float complex*)cooc.VA)[cn] = alpha * ((float complex*)cooa.VA)[an],
6467 			++cn;
6468 
6469 		if( an<cooa.nnz )
6470 		for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6471 			       	(cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6472 			       	;++bn)
6473 			//	RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6474 			cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6475 			((float complex*)cooc.VA)[cn] = beta * ((float complex*)coob.VA)[bn],
6476 			++cn;
6477 		//RSB_STDOUT("? %d %d\n",an,bn);
6478 	}
6479 	}
6480 	else
6481 #endif
6482 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6483 	if(cooa.typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6484 	{
6485 	double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
6486 	double complex beta  = betap ?*(double complex*)betap :((double complex)(1.0));
6487 	for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6488 	{
6489 		rsb_nnz_idx_t ap = an, bp = bn;
6490 		if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6491 			cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6492 			((double complex*)cooc.VA)[cn] = alpha * ((double complex*)cooa.VA)[an] + beta * ((double complex*)coob.VA)[bn],
6493 			ap = an, bp = bn, ++cn, ++an, ++bn;
6494 
6495 		for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6496 			//RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6497 			((double complex*)cooc.VA)[cn] += alpha * ((double complex*)cooa.VA)[an];
6498 
6499 		for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6500 			//RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6501 			((double complex*)cooc.VA)[cn] += beta  * ((double complex*)coob.VA)[bn];
6502 
6503 		if( bn<coob.nnz )
6504 		for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6505 			       	(cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6506 			       	;++an)
6507 				//RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6508 			cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6509 			((double complex*)cooc.VA)[cn] = alpha * ((double complex*)cooa.VA)[an],
6510 			++cn;
6511 
6512 		if( an<cooa.nnz )
6513 		for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6514 			       	(cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6515 			       	;++bn)
6516 			//	RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6517 			cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6518 			((double complex*)cooc.VA)[cn] = beta * ((double complex*)coob.VA)[bn],
6519 			++cn;
6520 		//RSB_STDOUT("? %d %d\n",an,bn);
6521 	}
6522 	}
6523 	else
6524 #endif
6525 	return RSB_ERR_UNSUPPORTED_TYPE	;
6526 	return RSB_ERR_NO_ERROR;
6527 }
6528 
rsb__cor_merge_dups(rsb_type_t typecode,void * RSB_RESTRICT VA,rsb_coo_idx_t * RSB_RESTRICT IA,rsb_coo_idx_t * RSB_RESTRICT JA,rsb_nnz_idx_t offB,rsb_nnz_idx_t nnzB,rsb_nnz_idx_t nnzC,const int wv,int wp,rsb_nnz_idx_t * onzp,struct rsb_coo_matrix_t * RSB_RESTRICT coop)6529 rsb_err_t rsb__cor_merge_dups(rsb_type_t typecode, void* RSB_RESTRICT VA, rsb_coo_idx_t * RSB_RESTRICT IA, rsb_coo_idx_t * RSB_RESTRICT JA, rsb_nnz_idx_t offB, rsb_nnz_idx_t nnzB, rsb_nnz_idx_t nnzC, const int wv, int wp, rsb_nnz_idx_t *onzp, struct rsb_coo_matrix_t*RSB_RESTRICT coop)
6530 {
6531 	/**
6532 		See rsb__cor_merge.
6533 	 */
6534 	rsb_err_t errval = RSB_ERR_NO_ERROR;
6535 	void *VB = NULL, *VC = NULL, *VT = NULL;
6536 	rsb_coo_idx_t * IB = NULL, *JB = NULL;
6537 	rsb_coo_idx_t * IC = NULL, *JC = NULL;
6538 	rsb_coo_idx_t * IT = NULL, *JT = NULL;
6539 	rsb_nnz_idx_t bi = 0, ci = 0, ti = 0;
6540 	rsb_nnz_idx_t b0 = 0, c0 = 0, t0 = 0;
6541 	rsb_nnz_idx_t onz = 0;
6542 	struct rsb_coo_matrix_t coo;
6543 	size_t es = RSB_SIZEOF(typecode);
6544 
6545 	if( nnzB == 0 || nnzC == 0 )
6546 	{
6547 		goto ret;
6548 	}
6549 
6550 	b0 = offB;
6551 	c0 = offB + nnzB;
6552 	VB = RSB_TYPED_OFF_PTR(typecode,VA,b0);
6553 	VC = RSB_TYPED_OFF_PTR(typecode,VA,c0);
6554 	IB = IA + b0;
6555 	IC = IA + c0;
6556 	JB = JA + b0;
6557 	JC = JA + c0;
6558 
6559 	RSB_BZERO_P(&coo);
6560 	coo.nnz = nnzB + nnzC;
6561 	coo.typecode = typecode;
6562 
6563 	if( coop && coop->nnz)
6564 	{
6565 		coo = *coop;
6566 		coo.nnz = nnzB + nnzC; /* necessary */
6567 	}
6568 	else
6569 	{
6570 		if( NULL == rsb__allocate_coo_matrix_t(&coo) )
6571 			goto err;
6572 	}
6573 
6574 	IT = coo.IA;
6575 	JT = coo.JA;
6576 	VT = coo.VA;
6577 
6578 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6579 	if(typecode == RSB_NUMERICAL_TYPE_DOUBLE )
6580 	{
6581 	double * vT = VT;
6582 	double * vB = VB;
6583 	double * vC = VC;
6584 
6585 again_double:
6586 	t0 = ti;
6587 
6588        	if   ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6589 	{
6590 		IT[ti] = IB[bi];
6591 		JT[ti] = JB[bi];
6592 		vT[ti] = vB[bi];
6593 		++bi,++ti;
6594 	}
6595 
6596        	while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6597 	{
6598 		--ti;
6599 		vT[ti] += vB[bi];
6600 		++bi;
6601 		++ti;
6602 		++onz;
6603 	}
6604 
6605 	/* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ...  */
6606 	t0 = ti;
6607        	if   ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6608 	{
6609 		IT[ti] = IB[bi];
6610 		JT[ti] = JB[bi];
6611 		vT[ti] = vB[bi] + vC[ci];
6612 		++bi,++ci,++ti;
6613 		++onz;
6614 	}
6615 
6616        	while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6617 	{
6618 		--ti;
6619 		vT[ti] += vB[bi] + vC[ci];
6620 		++bi;
6621 		++ci;
6622 		++ti;
6623 		++onz;
6624 	}
6625 
6626 	t0 = ti;
6627        	if   ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6628 	{
6629 		IT[ti] = IC[ci];
6630 		JT[ti] = JC[ci];
6631 		vT[ti] = vC[ci];
6632 		++ci,++ti;
6633 	}
6634 
6635        	while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6636 	{
6637 		--ti;
6638 		vT[ti] += vC[ci];
6639 		++ci;
6640 		++ti;
6641 		++onz;
6642 	}
6643 
6644 	if( ci < nnzC && bi < nnzB )
6645 		goto again_double;
6646 
6647 again_once_double:
6648 
6649        	if   ( bi<nnzB && ci==nnzC )
6650 	{
6651 		IT[ti] = IB[bi];
6652 		JT[ti] = JB[bi];
6653 		vT[ti] = vB[bi];
6654 		++bi,++ti;
6655 	}
6656 
6657        	while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6658 	{
6659 		--ti;
6660 		vT[ti] += vB[bi];
6661 		++bi;
6662 		++ti;
6663 		++onz;
6664 	}
6665 
6666        	if   ( ci<nnzC && bi==nnzB )
6667 	{
6668 		IT[ti] = IC[ci];
6669 		JT[ti] = JC[ci];
6670 		vT[ti] = vC[ci];
6671 		++ci,++ti;
6672 	}
6673 
6674        	while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6675 	{
6676 		--ti;
6677 		vT[ti]+= vC[ci];
6678 		++ci;
6679 		++ti;
6680 		++onz;
6681 	}
6682 
6683 	if( ci < nnzC || bi < nnzB )
6684 		goto again_once_double;
6685 	}
6686 	else
6687 #endif
6688 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6689 	if(typecode == RSB_NUMERICAL_TYPE_FLOAT )
6690 	{
6691 	float * vT = VT;
6692 	float * vB = VB;
6693 	float * vC = VC;
6694 
6695 again_float:
6696 	t0 = ti;
6697 
6698        	if   ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6699 	{
6700 		IT[ti] = IB[bi];
6701 		JT[ti] = JB[bi];
6702 		vT[ti] = vB[bi];
6703 		++bi,++ti;
6704 	}
6705 
6706        	while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6707 	{
6708 		--ti;
6709 		vT[ti] += vB[bi];
6710 		++bi;
6711 		++ti;
6712 		++onz;
6713 	}
6714 
6715 	/* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ...  */
6716 	t0 = ti;
6717        	if   ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6718 	{
6719 		IT[ti] = IB[bi];
6720 		JT[ti] = JB[bi];
6721 		vT[ti] = vB[bi] + vC[ci];
6722 		++bi,++ci,++ti;
6723 		++onz;
6724 	}
6725 
6726        	while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6727 	{
6728 		--ti;
6729 		vT[ti] += vB[bi] + vC[ci];
6730 		++bi;
6731 		++ci;
6732 		++ti;
6733 		++onz;
6734 	}
6735 
6736 	t0 = ti;
6737        	if   ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6738 	{
6739 		IT[ti] = IC[ci];
6740 		JT[ti] = JC[ci];
6741 		vT[ti] = vC[ci];
6742 		++ci,++ti;
6743 	}
6744 
6745        	while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6746 	{
6747 		--ti;
6748 		vT[ti] += vC[ci];
6749 		++ci;
6750 		++ti;
6751 		++onz;
6752 	}
6753 
6754 	if( ci < nnzC && bi < nnzB )
6755 		goto again_float;
6756 
6757 again_once_float:
6758 
6759        	if   ( bi<nnzB && ci==nnzC )
6760 	{
6761 		IT[ti] = IB[bi];
6762 		JT[ti] = JB[bi];
6763 		vT[ti] = vB[bi];
6764 		++bi,++ti;
6765 	}
6766 
6767        	while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6768 	{
6769 		--ti;
6770 		vT[ti] += vB[bi];
6771 		++bi;
6772 		++ti;
6773 		++onz;
6774 	}
6775 
6776        	if   ( ci<nnzC && bi==nnzB )
6777 	{
6778 		IT[ti] = IC[ci];
6779 		JT[ti] = JC[ci];
6780 		vT[ti] = vC[ci];
6781 		++ci,++ti;
6782 	}
6783 
6784        	while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6785 	{
6786 		--ti;
6787 		vT[ti]+= vC[ci];
6788 		++ci;
6789 		++ti;
6790 		++onz;
6791 	}
6792 
6793 	if( ci < nnzC || bi < nnzB )
6794 		goto again_once_float;
6795 	}
6796 	else
6797 #endif
6798 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6799 	if(typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6800 	{
6801 	float complex * vT = VT;
6802 	float complex * vB = VB;
6803 	float complex * vC = VC;
6804 
6805 again_float_complex:
6806 	t0 = ti;
6807 
6808        	if   ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6809 	{
6810 		IT[ti] = IB[bi];
6811 		JT[ti] = JB[bi];
6812 		vT[ti] = vB[bi];
6813 		++bi,++ti;
6814 	}
6815 
6816        	while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6817 	{
6818 		--ti;
6819 		vT[ti] += vB[bi];
6820 		++bi;
6821 		++ti;
6822 		++onz;
6823 	}
6824 
6825 	/* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ...  */
6826 	t0 = ti;
6827        	if   ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6828 	{
6829 		IT[ti] = IB[bi];
6830 		JT[ti] = JB[bi];
6831 		vT[ti] = vB[bi] + vC[ci];
6832 		++bi,++ci,++ti;
6833 		++onz;
6834 	}
6835 
6836        	while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6837 	{
6838 		--ti;
6839 		vT[ti] += vB[bi] + vC[ci];
6840 		++bi;
6841 		++ci;
6842 		++ti;
6843 		++onz;
6844 	}
6845 
6846 	t0 = ti;
6847        	if   ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6848 	{
6849 		IT[ti] = IC[ci];
6850 		JT[ti] = JC[ci];
6851 		vT[ti] = vC[ci];
6852 		++ci,++ti;
6853 	}
6854 
6855        	while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6856 	{
6857 		--ti;
6858 		vT[ti] += vC[ci];
6859 		++ci;
6860 		++ti;
6861 		++onz;
6862 	}
6863 
6864 	if( ci < nnzC && bi < nnzB )
6865 		goto again_float_complex;
6866 
6867 again_once_float_complex:
6868 
6869        	if   ( bi<nnzB && ci==nnzC )
6870 	{
6871 		IT[ti] = IB[bi];
6872 		JT[ti] = JB[bi];
6873 		vT[ti] = vB[bi];
6874 		++bi,++ti;
6875 	}
6876 
6877        	while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6878 	{
6879 		--ti;
6880 		vT[ti] += vB[bi];
6881 		++bi;
6882 		++ti;
6883 		++onz;
6884 	}
6885 
6886        	if   ( ci<nnzC && bi==nnzB )
6887 	{
6888 		IT[ti] = IC[ci];
6889 		JT[ti] = JC[ci];
6890 		vT[ti] = vC[ci];
6891 		++ci,++ti;
6892 	}
6893 
6894        	while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6895 	{
6896 		--ti;
6897 		vT[ti]+= vC[ci];
6898 		++ci;
6899 		++ti;
6900 		++onz;
6901 	}
6902 
6903 	if( ci < nnzC || bi < nnzB )
6904 		goto again_once_float_complex;
6905 	}
6906 	else
6907 #endif
6908 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6909 	if(typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6910 	{
6911 	double complex * vT = VT;
6912 	double complex * vB = VB;
6913 	double complex * vC = VC;
6914 
6915 again_double_complex:
6916 	t0 = ti;
6917 
6918        	if   ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6919 	{
6920 		IT[ti] = IB[bi];
6921 		JT[ti] = JB[bi];
6922 		vT[ti] = vB[bi];
6923 		++bi,++ti;
6924 	}
6925 
6926        	while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6927 	{
6928 		--ti;
6929 		vT[ti] += vB[bi];
6930 		++bi;
6931 		++ti;
6932 		++onz;
6933 	}
6934 
6935 	/* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ...  */
6936 	t0 = ti;
6937        	if   ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6938 	{
6939 		IT[ti] = IB[bi];
6940 		JT[ti] = JB[bi];
6941 		vT[ti] = vB[bi] + vC[ci];
6942 		++bi,++ci,++ti;
6943 		++onz;
6944 	}
6945 
6946        	while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6947 	{
6948 		--ti;
6949 		vT[ti] += vB[bi] + vC[ci];
6950 		++bi;
6951 		++ci;
6952 		++ti;
6953 		++onz;
6954 	}
6955 
6956 	t0 = ti;
6957        	if   ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6958 	{
6959 		IT[ti] = IC[ci];
6960 		JT[ti] = JC[ci];
6961 		vT[ti] = vC[ci];
6962 		++ci,++ti;
6963 	}
6964 
6965        	while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6966 	{
6967 		--ti;
6968 		vT[ti] += vC[ci];
6969 		++ci;
6970 		++ti;
6971 		++onz;
6972 	}
6973 
6974 	if( ci < nnzC && bi < nnzB )
6975 		goto again_double_complex;
6976 
6977 again_once_double_complex:
6978 
6979        	if   ( bi<nnzB && ci==nnzC )
6980 	{
6981 		IT[ti] = IB[bi];
6982 		JT[ti] = JB[bi];
6983 		vT[ti] = vB[bi];
6984 		++bi,++ti;
6985 	}
6986 
6987        	while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6988 	{
6989 		--ti;
6990 		vT[ti] += vB[bi];
6991 		++bi;
6992 		++ti;
6993 		++onz;
6994 	}
6995 
6996        	if   ( ci<nnzC && bi==nnzB )
6997 	{
6998 		IT[ti] = IC[ci];
6999 		JT[ti] = JC[ci];
7000 		vT[ti] = vC[ci];
7001 		++ci,++ti;
7002 	}
7003 
7004        	while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
7005 	{
7006 		--ti;
7007 		vT[ti]+= vC[ci];
7008 		++ci;
7009 		++ti;
7010 		++onz;
7011 	}
7012 
7013 	if( ci < nnzC || bi < nnzB )
7014 		goto again_once_double_complex;
7015 	}
7016 	else
7017 #endif
7018 		errval = RSB_ERR_INTERNAL_ERROR;
7019 
7020 	coo.nnz -= onz;
7021 	RSB_COA_MEMCPY(IA,IT,offB,0,(coo.nnz));
7022 	RSB_COA_MEMCPY(JA,JT,offB,0,(coo.nnz));
7023 	if(wp)
7024 	{
7025 		RSB_A_MEMCPY_parallel(  VA,VT,offB,0,(coo.nnz),es);
7026 	}
7027 	else
7028 	{
7029 		RSB_A_MEMCPY(  VA,VT,offB,0,(coo.nnz),es);
7030 	}
7031 	RSB_ASSERT(rsb__util_is_coo_array_sorted_up_partial_order(IA,coo.nnz));
7032 	goto done;
7033 err:
7034 	errval = RSB_ERR_ENOMEM;
7035 done:
7036 	if( coop && coop->nnz)
7037 		;
7038 	else
7039 		rsb__destroy_coo_matrix_t(&coo);
7040 	RSB_ASSIGN_IF(onzp,onz);
7041 ret:
7042 	return errval;
7043 }
7044 
rsb__do_copy_converted_scaled(const void * RSB_RESTRICT src,void * RSB_RESTRICT dst,const void * RSB_RESTRICT alphap,rsb_type_t stype,rsb_type_t dtype,size_t nnz,rsb_trans_t transA)7045 rsb_err_t rsb__do_copy_converted_scaled(const void *RSB_RESTRICT  src, void *RSB_RESTRICT dst, const void *RSB_RESTRICT  alphap, rsb_type_t stype,rsb_type_t dtype, size_t nnz, rsb_trans_t transA)
7046 {
7047 	/*!
7048 	 * Copies scaled and conj-transposed.
7049 	 * alpha according to src code type.
7050 	 * \return \rsberrcodemsg
7051 	 * */
7052 	rsb_nnz_idx_t nzi;
7053 
7054 	if((!dst) || (!src))
7055 		return RSB_ERR_BADARGS;
7056 
7057 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE  && dtype == RSB_NUMERICAL_TYPE_DOUBLE  )
7058 	{
7059 		const double alpha = alphap?*(double*)alphap:((double)(1.0));
7060 		const double*tsrc = src;
7061 		double*tdst = dst;
7062 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double)(alpha*tsrc[nzi]);
7063 	}
7064 	else
7065 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE  && dtype == RSB_NUMERICAL_TYPE_FLOAT  )
7066 	{
7067 		const double alpha = alphap?*(double*)alphap:((double)(1.0));
7068 		const double*tsrc = src;
7069 		float*tdst = dst;
7070 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float)(alpha*tsrc[nzi]);
7071 	}
7072 	else
7073 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE  && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7074 	{
7075 		const double alpha = alphap?*(double*)alphap:((double)(1.0));
7076 		const double*tsrc = src;
7077 		float complex*tdst = dst;
7078 		if(RSB_DOES_CONJUGATE(transA))
7079 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi])) + 0*I;
7080 		else
7081 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]) + 0*I;
7082 	}
7083 	else
7084 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE  && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7085 	{
7086 		const double alpha = alphap?*(double*)alphap:((double)(1.0));
7087 		const double*tsrc = src;
7088 		double complex*tdst = dst;
7089 		if(RSB_DOES_CONJUGATE(transA))
7090 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi])) + 0*I;
7091 		else
7092 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]) + 0*I;
7093 	}
7094 	else
7095 	if( stype == RSB_NUMERICAL_TYPE_FLOAT  && dtype == RSB_NUMERICAL_TYPE_DOUBLE  )
7096 	{
7097 		const float alpha = alphap?*(float*)alphap:((float)(1.0));
7098 		const float*tsrc = src;
7099 		double*tdst = dst;
7100 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double)(alpha*tsrc[nzi]);
7101 	}
7102 	else
7103 	if( stype == RSB_NUMERICAL_TYPE_FLOAT  && dtype == RSB_NUMERICAL_TYPE_FLOAT  )
7104 	{
7105 		const float alpha = alphap?*(float*)alphap:((float)(1.0));
7106 		const float*tsrc = src;
7107 		float*tdst = dst;
7108 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float)(alpha*tsrc[nzi]);
7109 	}
7110 	else
7111 	if( stype == RSB_NUMERICAL_TYPE_FLOAT  && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7112 	{
7113 		const float alpha = alphap?*(float*)alphap:((float)(1.0));
7114 		const float*tsrc = src;
7115 		float complex*tdst = dst;
7116 		if(RSB_DOES_CONJUGATE(transA))
7117 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi])) + 0*I;
7118 		else
7119 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]) + 0*I;
7120 	}
7121 	else
7122 	if( stype == RSB_NUMERICAL_TYPE_FLOAT  && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7123 	{
7124 		const float alpha = alphap?*(float*)alphap:((float)(1.0));
7125 		const float*tsrc = src;
7126 		double complex*tdst = dst;
7127 		if(RSB_DOES_CONJUGATE(transA))
7128 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi])) + 0*I;
7129 		else
7130 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]) + 0*I;
7131 	}
7132 	else
7133 	if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_DOUBLE  )
7134 	{
7135 		const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7136 		const float complex*tsrc = src;
7137 		double*tdst = dst;
7138 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = crealf((double)(alpha*tsrc[nzi]));
7139 	}
7140 	else
7141 	if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_FLOAT  )
7142 	{
7143 		const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7144 		const float complex*tsrc = src;
7145 		float*tdst = dst;
7146 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = crealf((float)(alpha*tsrc[nzi]));
7147 	}
7148 	else
7149 	if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7150 	{
7151 		const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7152 		const float complex*tsrc = src;
7153 		float complex*tdst = dst;
7154 		if(RSB_DOES_CONJUGATE(transA))
7155 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi]));
7156 		else
7157 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]);
7158 	}
7159 	else
7160 	if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7161 	{
7162 		const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7163 		const float complex*tsrc = src;
7164 		double complex*tdst = dst;
7165 		if(RSB_DOES_CONJUGATE(transA))
7166 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi]));
7167 		else
7168 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]);
7169 	}
7170 	else
7171 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_DOUBLE  )
7172 	{
7173 		const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7174 		const double complex*tsrc = src;
7175 		double*tdst = dst;
7176 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = creal((double)(alpha*tsrc[nzi]));
7177 	}
7178 	else
7179 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_FLOAT  )
7180 	{
7181 		const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7182 		const double complex*tsrc = src;
7183 		float*tdst = dst;
7184 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = creal((float)(alpha*tsrc[nzi]));
7185 	}
7186 	else
7187 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7188 	{
7189 		const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7190 		const double complex*tsrc = src;
7191 		float complex*tdst = dst;
7192 		if(RSB_DOES_CONJUGATE(transA))
7193 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi]));
7194 		else
7195 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]);
7196 	}
7197 	else
7198 	if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7199 	{
7200 		const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7201 		const double complex*tsrc = src;
7202 		double complex*tdst = dst;
7203 		if(RSB_DOES_CONJUGATE(transA))
7204 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi]));
7205 		else
7206 			for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]);
7207 	}
7208 	else
7209 	return RSB_ERR_UNSUPPORTED_TYPE	;
7210 	return RSB_ERR_NO_ERROR;
7211 }
7212 
rsb_util_csc2csr(const void * RSB_RESTRICT VA,const rsb_coo_idx_t * RSB_RESTRICT IA,const rsb_coo_idx_t * RSB_RESTRICT JA,void * RSB_RESTRICT oVA,rsb_coo_idx_t * RSB_RESTRICT oIA,rsb_coo_idx_t * RSB_RESTRICT oJA,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_nnz_idx_t nnz,rsb_type_t typecode,const rsb_coo_idx_t offi,const rsb_coo_idx_t offo,rsb_flags_t * flagsp)7213 rsb_err_t rsb_util_csc2csr(const void *RSB_RESTRICT VA, const rsb_coo_idx_t * RSB_RESTRICT IA, const rsb_coo_idx_t * RSB_RESTRICT JA, void *RSB_RESTRICT oVA, rsb_coo_idx_t * RSB_RESTRICT oIA, rsb_coo_idx_t * RSB_RESTRICT oJA, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_nnz_idx_t nnz, rsb_type_t typecode, const rsb_coo_idx_t offi, const rsb_coo_idx_t offo, rsb_flags_t*flagsp)
7214 {
7215 	/*!
7216 	 * */
7217 	rsb_nnz_idx_t nzi = 0, nzo;
7218 	rsb_coo_idx_t nr, nc;
7219 	rsb_flags_t flags = RSB_FLAG_NOFLAGS;
7220 	rsb_bool_t islowtri = RSB_BOOL_TRUE, isupptri = RSB_BOOL_TRUE;
7221 	rsb_nnz_idx_t lowtrin = 0, upptrin = 0;
7222 
7223 	RSB_BZERO(oIA, sizeof(*oIA)*(m+1));
7224 	oIA[0] = offo;
7225 	for(nzi=0;nzi<nnz;++nzi)
7226 		oIA[IA[nzi]-offi+1]++;
7227 	for(nr=0;nr<m;++nr)
7228 		oIA[nr+1]+=oIA[nr];
7229 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7230 	for(nc=0;nc<k;++nc)
7231 	for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7232 	{
7233 		nzo = oIA[IA[nzi]-offi]++;
7234 		oJA[nzo] = nc+offo;
7235 		((double*)oVA)[nzo] = ((const double*)VA)[nzi];
7236 	}
7237 	else
7238 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7239 	for(nc=0;nc<k;++nc)
7240 	for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7241 	{
7242 		nzo = oIA[IA[nzi]-offi]++;
7243 		oJA[nzo] = nc+offo;
7244 		((float*)oVA)[nzo] = ((const float*)VA)[nzi];
7245 	}
7246 	else
7247 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7248 	for(nc=0;nc<k;++nc)
7249 	for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7250 	{
7251 		nzo = oIA[IA[nzi]-offi]++;
7252 		oJA[nzo] = nc+offo;
7253 		((float complex*)oVA)[nzo] = ((const float complex*)VA)[nzi];
7254 	}
7255 	else
7256 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7257 	for(nc=0;nc<k;++nc)
7258 	for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7259 	{
7260 		nzo = oIA[IA[nzi]-offi]++;
7261 		oJA[nzo] = nc+offo;
7262 		((double complex*)oVA)[nzo] = ((const double complex*)VA)[nzi];
7263 	}
7264 	else
7265 		return RSB_ERR_UNSUPPORTED_TYPE	;
7266 	for(nc=0;nc<k;++nc)
7267 	for(nzi=JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7268 	{
7269 		oIA[IA[nzi]-offi]--;
7270 		if(IA[nzi]-offi>nc)
7271 			lowtrin++;
7272 		else
7273 			if(IA[nzi]-offi<nc)
7274 				upptrin++;
7275 	}
7276 	if(upptrin)
7277 		islowtri = RSB_BOOL_FALSE;
7278 	if(lowtrin)
7279 		isupptri = RSB_BOOL_FALSE;
7280 	if(isupptri)
7281 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
7282 	if(islowtri)
7283 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
7284 	if( RSB_XOR(upptrin,lowtrin) )
7285 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_TRIANGULAR);
7286 	if(*flagsp) RSB_DO_FLAG_ADD(*flagsp,flags);
7287 	return RSB_ERR_NO_ERROR;
7288 }
7289 
rsb_util_coo_copy_and_stats(const void * RSB_RESTRICT VA,const rsb_coo_idx_t * RSB_RESTRICT IA,const rsb_coo_idx_t * RSB_RESTRICT JA,void * RSB_RESTRICT oVA,rsb_coo_idx_t * RSB_RESTRICT oIA,rsb_coo_idx_t * RSB_RESTRICT oJA,rsb_coo_idx_t * m,rsb_coo_idx_t * k,const rsb_nnz_idx_t nnz,const rsb_type_t typecode,const rsb_coo_idx_t offi,const rsb_coo_idx_t offo,rsb_flags_t iflags,rsb_flags_t * flagsp)7290 rsb_err_t rsb_util_coo_copy_and_stats(const void *RSB_RESTRICT VA, const rsb_coo_idx_t * RSB_RESTRICT IA, const rsb_coo_idx_t * RSB_RESTRICT JA, void *RSB_RESTRICT oVA, rsb_coo_idx_t * RSB_RESTRICT oIA, rsb_coo_idx_t * RSB_RESTRICT oJA, rsb_coo_idx_t*m, rsb_coo_idx_t*k, const rsb_nnz_idx_t nnz, const rsb_type_t typecode, const rsb_coo_idx_t offi, const rsb_coo_idx_t offo, rsb_flags_t iflags, rsb_flags_t*flagsp)
7291 {
7292 	/*!
7293          * FIXME: unfinished! shall support also typecode-based zeros removal
7294 	 * */
7295 	rsb_nnz_idx_t nzi = 0;
7296 	rsb_coo_idx_t maxi = 0,maxj = 0;
7297 	rsb_bool_t islowtri = RSB_BOOL_TRUE,isupptri = RSB_BOOL_TRUE;
7298 	rsb_flags_t flags = RSB_FLAG_NOFLAGS;
7299 	rsb_nnz_idx_t lowtrin = 0,upptrin = 0;
7300 
7301 	if(nnz<1)
7302 		goto done;
7303 
7304 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7305 {
7306 	rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7307 	maxi = i, maxj = j;
7308 	((double*)oVA)[nzi] = ((double*)VA)[nzi];
7309 	oIA[nzi] = i-offi+offo;
7310 	oJA[nzi] = j-offi+offo;
7311 	lowtrin |= (i>j), upptrin |= (i<j);
7312 	for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7313 	{
7314 		rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7315 		maxi = RSB_MAX(maxi, i);
7316 		maxj = RSB_MAX(maxj, j);
7317 		((double*)oVA)[nzi] = ((double*)VA)[nzi];
7318 		oIA[nzi] = i-offi+offo;
7319 		oJA[nzi] = j-offi+offo;
7320 		lowtrin |= (i>j);
7321 		upptrin |= (i<j);
7322 	}
7323 }
7324 	else
7325 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7326 {
7327 	rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7328 	maxi = i, maxj = j;
7329 	((float*)oVA)[nzi] = ((float*)VA)[nzi];
7330 	oIA[nzi] = i-offi+offo;
7331 	oJA[nzi] = j-offi+offo;
7332 	lowtrin |= (i>j), upptrin |= (i<j);
7333 	for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7334 	{
7335 		rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7336 		maxi = RSB_MAX(maxi, i);
7337 		maxj = RSB_MAX(maxj, j);
7338 		((float*)oVA)[nzi] = ((float*)VA)[nzi];
7339 		oIA[nzi] = i-offi+offo;
7340 		oJA[nzi] = j-offi+offo;
7341 		lowtrin |= (i>j);
7342 		upptrin |= (i<j);
7343 	}
7344 }
7345 	else
7346 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7347 {
7348 	rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7349 	maxi = i, maxj = j;
7350 	((float complex*)oVA)[nzi] = ((float complex*)VA)[nzi];
7351 	oIA[nzi] = i-offi+offo;
7352 	oJA[nzi] = j-offi+offo;
7353 	lowtrin |= (i>j), upptrin |= (i<j);
7354 	for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7355 	{
7356 		rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7357 		maxi = RSB_MAX(maxi, i);
7358 		maxj = RSB_MAX(maxj, j);
7359 		((float complex*)oVA)[nzi] = ((float complex*)VA)[nzi];
7360 		oIA[nzi] = i-offi+offo;
7361 		oJA[nzi] = j-offi+offo;
7362 		lowtrin |= (i>j);
7363 		upptrin |= (i<j);
7364 	}
7365 }
7366 	else
7367 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7368 {
7369 	rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7370 	maxi = i, maxj = j;
7371 	((double complex*)oVA)[nzi] = ((double complex*)VA)[nzi];
7372 	oIA[nzi] = i-offi+offo;
7373 	oJA[nzi] = j-offi+offo;
7374 	lowtrin |= (i>j), upptrin |= (i<j);
7375 	for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7376 	{
7377 		rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7378 		maxi = RSB_MAX(maxi, i);
7379 		maxj = RSB_MAX(maxj, j);
7380 		((double complex*)oVA)[nzi] = ((double complex*)VA)[nzi];
7381 		oIA[nzi] = i-offi+offo;
7382 		oJA[nzi] = j-offi+offo;
7383 		lowtrin |= (i>j);
7384 		upptrin |= (i<j);
7385 	}
7386 }
7387 	else
7388 		return RSB_ERR_UNSUPPORTED_TYPE;
7389 	if(upptrin)
7390 		islowtri = RSB_BOOL_FALSE;
7391 	if(lowtrin)
7392 		isupptri = RSB_BOOL_FALSE;
7393 	if(isupptri)
7394 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
7395 	if(islowtri)
7396 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
7397 	if( RSB_XOR(upptrin,lowtrin) )
7398 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_TRIANGULAR);
7399 	if(flagsp) RSB_DO_FLAG_ADD(*flagsp,flags);
7400 	if(m) *m = maxi+1;
7401 	if(k) *k = maxj+1;
7402 done:
7403 	return RSB_ERR_NO_ERROR;
7404 }
7405 
rsb_util_coo_copy(const void * RSB_RESTRICT VA,const rsb_coo_idx_t * RSB_RESTRICT IA,const rsb_coo_idx_t * RSB_RESTRICT JA,void * RSB_RESTRICT oVA,rsb_coo_idx_t * RSB_RESTRICT oIA,rsb_coo_idx_t * RSB_RESTRICT oJA,const rsb_nnz_idx_t nnz,const rsb_type_t typecode,const rsb_coo_idx_t offi,const rsb_coo_idx_t offo)7406 rsb_err_t rsb_util_coo_copy(const void *RSB_RESTRICT VA, const rsb_coo_idx_t * RSB_RESTRICT IA, const rsb_coo_idx_t * RSB_RESTRICT JA, void *RSB_RESTRICT oVA, rsb_coo_idx_t * RSB_RESTRICT oIA, rsb_coo_idx_t * RSB_RESTRICT oJA, const rsb_nnz_idx_t nnz, const rsb_type_t typecode, const rsb_coo_idx_t offi, const rsb_coo_idx_t offo)
7407 {
7408 	/*!
7409          * FIXME: unfinished! shall support also typecode-based zeros removal
7410 	 * */
7411 	rsb_nnz_idx_t nzi = 0;
7412 
7413 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7414 {
7415 	for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7416 	{
7417 		rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7418 		((double*)oVA)[nzi] = ((double*)VA)[nzi];
7419 		oIA[nzi] = i-offi+offo;
7420 		oJA[nzi] = j-offi+offo;
7421 	}
7422 }
7423 	else
7424 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7425 {
7426 	for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7427 	{
7428 		rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7429 		((float*)oVA)[nzi] = ((float*)VA)[nzi];
7430 		oIA[nzi] = i-offi+offo;
7431 		oJA[nzi] = j-offi+offo;
7432 	}
7433 }
7434 	else
7435 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7436 {
7437 	for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7438 	{
7439 		rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7440 		((float complex*)oVA)[nzi] = ((float complex*)VA)[nzi];
7441 		oIA[nzi] = i-offi+offo;
7442 		oJA[nzi] = j-offi+offo;
7443 	}
7444 }
7445 	else
7446 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7447 {
7448 	for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7449 	{
7450 		rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7451 		((double complex*)oVA)[nzi] = ((double complex*)VA)[nzi];
7452 		oIA[nzi] = i-offi+offo;
7453 		oJA[nzi] = j-offi+offo;
7454 	}
7455 }
7456 	else
7457 	return RSB_ERR_UNSUPPORTED_TYPE	;
7458 	return RSB_ERR_NO_ERROR;
7459 }
7460 
7461 /* sparse blas level 1 equivalent functions */
7462 
rsb__BLAS_Xusdot(const rsb_type_t typecode,const enum blas_conj_type conj_arg,const rsb_blas_int_t nz,const void * x,const rsb_blas_int_t * indx,const void * y,const rsb_blas_int_t incy,void * r,const enum blas_base_type index_base)7463 int rsb__BLAS_Xusdot(const rsb_type_t typecode, const enum blas_conj_type conj_arg, const rsb_blas_int_t nz, const void*x, const rsb_blas_int_t*indx, const void*y, const rsb_blas_int_t incy, void*r, const enum blas_base_type index_base)
7464 {
7465 	/*!
7466 		\rsb_spblasl1_dot_msg
7467 		\rsb_warn_untested_msg
7468 	*/
7469 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7470 {
7471 	double*xa = (double*)x;
7472 	double*ya = (double*)y;
7473 	double*rp = (double*)r;
7474 	double ac = ((double)(0));
7475 	rsb_blas_int_t nzi, xi;
7476 	if( index_base == blas_one_base )
7477 		ya-=incy;
7478 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7479 	{
7480 		xi = indx[nzi];
7481     		ac += xa[nzi] * ya[xi*incy];
7482 	}
7483 	RSB_SET_IF_NOT_NULL(rp,ac);
7484 }
7485 	else
7486 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7487 {
7488 	float*xa = (float*)x;
7489 	float*ya = (float*)y;
7490 	float*rp = (float*)r;
7491 	float ac = ((float)(0));
7492 	rsb_blas_int_t nzi, xi;
7493 	if( index_base == blas_one_base )
7494 		ya-=incy;
7495 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7496 	{
7497 		xi = indx[nzi];
7498     		ac += xa[nzi] * ya[xi*incy];
7499 	}
7500 	RSB_SET_IF_NOT_NULL(rp,ac);
7501 }
7502 	else
7503 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7504 {
7505 	float complex*xa = (float complex*)x;
7506 	float complex*ya = (float complex*)y;
7507 	float complex*rp = (float complex*)r;
7508 	float complex ac = ((float complex)(0));
7509 	rsb_blas_int_t nzi, xi;
7510 	if( index_base == blas_one_base )
7511 		ya-=incy;
7512 	if( conj_arg == blas_conj )
7513 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7514 	{
7515 		xi = indx[nzi];
7516     		ac += conjf(xa[nzi]) * ya[xi*incy];
7517 	}
7518 	else
7519 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7520 	{
7521 		xi = indx[nzi];
7522     		ac += xa[nzi] * ya[xi*incy];
7523 	}
7524 	RSB_SET_IF_NOT_NULL(rp,ac);
7525 }
7526 	else
7527 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7528 {
7529 	double complex*xa = (double complex*)x;
7530 	double complex*ya = (double complex*)y;
7531 	double complex*rp = (double complex*)r;
7532 	double complex ac = ((double complex)(0));
7533 	rsb_blas_int_t nzi, xi;
7534 	if( index_base == blas_one_base )
7535 		ya-=incy;
7536 	if( conj_arg == blas_conj )
7537 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7538 	{
7539 		xi = indx[nzi];
7540     		ac += conj(xa[nzi]) * ya[xi*incy];
7541 	}
7542 	else
7543 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7544 	{
7545 		xi = indx[nzi];
7546     		ac += xa[nzi] * ya[xi*incy];
7547 	}
7548 	RSB_SET_IF_NOT_NULL(rp,ac);
7549 }
7550 	else
7551 	return RSB_ERR_UNSUPPORTED_TYPE	;
7552 	return RSB_ERR_NO_ERROR;
7553 }
7554 
rsb__BLAS_Xusaxpy(const rsb_type_t typecode,const rsb_blas_int_t nz,const void * alpha,const void * x,const rsb_blas_int_t * indx,const void * y,const rsb_blas_int_t incy,const enum blas_base_type index_base)7555 int rsb__BLAS_Xusaxpy(const rsb_type_t typecode, const rsb_blas_int_t nz, const void*alpha, const void*x, const rsb_blas_int_t*indx, const void*y, const rsb_blas_int_t incy, const enum blas_base_type index_base)
7556 {
7557 	/*!
7558 		\rsb_spblasl1_axpy_msg
7559 		\rsb_warn_untested_msg
7560 	*/
7561 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7562 {
7563 	const double*xa = (const double*)x;
7564 	double*ya = (double*)y;
7565 	const double alphav = *(double*)alpha;
7566 	rsb_blas_int_t nzi, xi;
7567 	if( index_base == blas_one_base )
7568 		ya-=incy;
7569 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7570 	{
7571 		xi = indx[nzi];
7572 		ya[nzi*incy] += alphav*xa[xi];
7573 	}
7574 }
7575 	else
7576 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7577 {
7578 	const float*xa = (const float*)x;
7579 	float*ya = (float*)y;
7580 	const float alphav = *(float*)alpha;
7581 	rsb_blas_int_t nzi, xi;
7582 	if( index_base == blas_one_base )
7583 		ya-=incy;
7584 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7585 	{
7586 		xi = indx[nzi];
7587 		ya[nzi*incy] += alphav*xa[xi];
7588 	}
7589 }
7590 	else
7591 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7592 {
7593 	const float complex*xa = (const float complex*)x;
7594 	float complex*ya = (float complex*)y;
7595 	const float complex alphav = *(float complex*)alpha;
7596 	rsb_blas_int_t nzi, xi;
7597 	if( index_base == blas_one_base )
7598 		ya-=incy;
7599 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7600 	{
7601 		xi = indx[nzi];
7602 		ya[nzi*incy] += alphav*xa[xi];
7603 	}
7604 }
7605 	else
7606 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7607 {
7608 	const double complex*xa = (const double complex*)x;
7609 	double complex*ya = (double complex*)y;
7610 	const double complex alphav = *(double complex*)alpha;
7611 	rsb_blas_int_t nzi, xi;
7612 	if( index_base == blas_one_base )
7613 		ya-=incy;
7614 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7615 	{
7616 		xi = indx[nzi];
7617 		ya[nzi*incy] += alphav*xa[xi];
7618 	}
7619 }
7620 	else
7621 	return RSB_ERR_UNSUPPORTED_TYPE	;
7622 	return RSB_ERR_NO_ERROR;
7623 }
7624 
rsb__BLAS_Xusga(const rsb_type_t typecode,const rsb_blas_int_t nz,const void * y,const rsb_blas_int_t incy,void * x,const rsb_blas_int_t * indx,const enum blas_base_type index_base)7625 int rsb__BLAS_Xusga(const rsb_type_t typecode, const rsb_blas_int_t nz, const void*y, const rsb_blas_int_t incy, void*x, const rsb_blas_int_t*indx, const enum blas_base_type index_base)
7626 {
7627 	/*!
7628 		\rsb_spblasl1_ga_msg
7629 		\rsb_warn_untested_msg
7630 	*/
7631 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7632 {
7633 	double*xa = (double*)x;
7634 	const double*ya = (const double*)y;
7635 	rsb_blas_int_t nzi,xi;
7636 	if( index_base == blas_one_base )
7637 		ya-=incy;
7638 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7639 	{
7640 		xi = indx[nzi];
7641     		xa[nzi] = ya[xi*incy];
7642 	}
7643 }
7644 	else
7645 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7646 {
7647 	float*xa = (float*)x;
7648 	const float*ya = (const float*)y;
7649 	rsb_blas_int_t nzi,xi;
7650 	if( index_base == blas_one_base )
7651 		ya-=incy;
7652 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7653 	{
7654 		xi = indx[nzi];
7655     		xa[nzi] = ya[xi*incy];
7656 	}
7657 }
7658 	else
7659 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7660 {
7661 	float complex*xa = (float complex*)x;
7662 	const float complex*ya = (const float complex*)y;
7663 	rsb_blas_int_t nzi,xi;
7664 	if( index_base == blas_one_base )
7665 		ya-=incy;
7666 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7667 	{
7668 		xi = indx[nzi];
7669     		xa[nzi] = ya[xi*incy];
7670 	}
7671 }
7672 	else
7673 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7674 {
7675 	double complex*xa = (double complex*)x;
7676 	const double complex*ya = (const double complex*)y;
7677 	rsb_blas_int_t nzi,xi;
7678 	if( index_base == blas_one_base )
7679 		ya-=incy;
7680 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7681 	{
7682 		xi = indx[nzi];
7683     		xa[nzi] = ya[xi*incy];
7684 	}
7685 }
7686 	else
7687 	return RSB_ERR_UNSUPPORTED_TYPE	;
7688 	return RSB_ERR_NO_ERROR;
7689 }
7690 
rsb__BLAS_Xusgz(const rsb_type_t typecode,const rsb_blas_int_t nz,void * y,const rsb_blas_int_t incy,void * x,const rsb_blas_int_t * indx,const enum blas_base_type index_base)7691 int rsb__BLAS_Xusgz(const rsb_type_t typecode, const rsb_blas_int_t nz, void*y, const rsb_blas_int_t incy, void*x, const rsb_blas_int_t*indx, const enum blas_base_type index_base)
7692 {
7693 	/*!
7694 		\rsb_spblasl1_gz_msg
7695 		\rsb_warn_untested_msg
7696 	*/
7697 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7698 {
7699 	double*xa = (double*)x;
7700 	double*ya = (double*)y;
7701 	rsb_blas_int_t nzi,xi;
7702 	if( index_base == blas_one_base )
7703 		ya-=incy;
7704 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7705 	{
7706 		xi = indx[nzi];
7707     		xa[nzi] = ya[xi*incy];
7708 		ya[xi*incy] = ((double)(0));
7709 	}
7710 }
7711 	else
7712 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7713 {
7714 	float*xa = (float*)x;
7715 	float*ya = (float*)y;
7716 	rsb_blas_int_t nzi,xi;
7717 	if( index_base == blas_one_base )
7718 		ya-=incy;
7719 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7720 	{
7721 		xi = indx[nzi];
7722     		xa[nzi] = ya[xi*incy];
7723 		ya[xi*incy] = ((float)(0));
7724 	}
7725 }
7726 	else
7727 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7728 {
7729 	float complex*xa = (float complex*)x;
7730 	float complex*ya = (float complex*)y;
7731 	rsb_blas_int_t nzi,xi;
7732 	if( index_base == blas_one_base )
7733 		ya-=incy;
7734 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7735 	{
7736 		xi = indx[nzi];
7737     		xa[nzi] = ya[xi*incy];
7738 		ya[xi*incy] = ((float complex)(0));
7739 	}
7740 }
7741 	else
7742 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7743 {
7744 	double complex*xa = (double complex*)x;
7745 	double complex*ya = (double complex*)y;
7746 	rsb_blas_int_t nzi,xi;
7747 	if( index_base == blas_one_base )
7748 		ya-=incy;
7749 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7750 	{
7751 		xi = indx[nzi];
7752     		xa[nzi] = ya[xi*incy];
7753 		ya[xi*incy] = ((double complex)(0));
7754 	}
7755 }
7756 	else
7757 	return RSB_ERR_UNSUPPORTED_TYPE	;
7758 	return RSB_ERR_NO_ERROR;
7759 }
7760 
rsb__BLAS_Xussc(const rsb_type_t typecode,const rsb_blas_int_t nz,const void * x,void * y,const rsb_blas_int_t incy,const rsb_blas_int_t * indx,const enum blas_base_type index_base)7761 int rsb__BLAS_Xussc(const rsb_type_t typecode, const rsb_blas_int_t nz, const void*x, void*y, const rsb_blas_int_t incy, const rsb_blas_int_t*indx, const enum blas_base_type index_base)
7762 {
7763 	/*!
7764 		\rsb_spblasl1_sc_msg
7765 		\rsb_warn_untested_msg
7766 	*/
7767 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7768 {
7769 	const double*xa = (const double*)x;
7770 	double*ya = (double*)y;
7771 	rsb_blas_int_t nzi,xi;
7772 	if( index_base == blas_one_base )
7773 		ya-=incy;
7774 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7775 	{
7776 		xi = indx[nzi];
7777 		ya[xi*incy] = xa[nzi];
7778 	}
7779 }
7780 	else
7781 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
7782 {
7783 	const float*xa = (const float*)x;
7784 	float*ya = (float*)y;
7785 	rsb_blas_int_t nzi,xi;
7786 	if( index_base == blas_one_base )
7787 		ya-=incy;
7788 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7789 	{
7790 		xi = indx[nzi];
7791 		ya[xi*incy] = xa[nzi];
7792 	}
7793 }
7794 	else
7795 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7796 {
7797 	const float complex*xa = (const float complex*)x;
7798 	float complex*ya = (float complex*)y;
7799 	rsb_blas_int_t nzi,xi;
7800 	if( index_base == blas_one_base )
7801 		ya-=incy;
7802 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7803 	{
7804 		xi = indx[nzi];
7805 		ya[xi*incy] = xa[nzi];
7806 	}
7807 }
7808 	else
7809 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7810 {
7811 	const double complex*xa = (const double complex*)x;
7812 	double complex*ya = (double complex*)y;
7813 	rsb_blas_int_t nzi,xi;
7814 	if( index_base == blas_one_base )
7815 		ya-=incy;
7816 	for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7817 	{
7818 		xi = indx[nzi];
7819 		ya[xi*incy] = xa[nzi];
7820 	}
7821 }
7822 	else
7823 	return RSB_ERR_UNSUPPORTED_TYPE	;
7824 	return RSB_ERR_NO_ERROR;
7825 }
7826 
7827 /* blas level 1 equivalent functions */
7828 
rsb__cblas_Xcopy(rsb_type_t typecode,rsb_nnz_idx_t n,const void * x,rsb_nnz_idx_t incx,void * y,rsb_nnz_idx_t incy)7829 rsb_err_t rsb__cblas_Xcopy(rsb_type_t typecode, rsb_nnz_idx_t n, const void * x, rsb_nnz_idx_t incx, void * y, rsb_nnz_idx_t incy)
7830 {
7831 	return rsb__xcopy_strided_typed(y,x,0,0,n,typecode,incy,incx);
7832 }
7833 
rsb__cblas_Xnrm2(rsb_type_t type,size_t n,const void * a,rsb_nnz_idx_t incA,void * c)7834 rsb_err_t rsb__cblas_Xnrm2(rsb_type_t type, size_t n, const void * a, rsb_nnz_idx_t incA, void * c){
7835 	/*!
7836 	 * c <- sqrt(sum(|a_i|^2))
7837          *
7838 	 * \param a	an array pointer
7839 	 * \param type	a valid type code
7840 	 * \param n	the input array length
7841 	 * \note see dznrm2 in BLAS
7842 	 *
7843 	 * \return \rsberrcodemsg
7844 	 * */
7845 	size_t i;
7846 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
7847 	if( type == RSB_NUMERICAL_TYPE_DOUBLE  )
7848 	{
7849 	const double*ta = a;double *tc = c,acc = ((double)(0)),tmp = ((double)(0));
7850 	{
7851 for(i=0;i+15<n;i+=16){
7852 	acc = fabs(ta[(i+0 )*incA]);tmp += acc*acc;
7853 		acc = fabs(ta[(i+1 )*incA]);tmp += acc*acc;
7854 		acc = fabs(ta[(i+2 )*incA]);tmp += acc*acc;
7855 		acc = fabs(ta[(i+3 )*incA]);tmp += acc*acc;
7856 		acc = fabs(ta[(i+4 )*incA]);tmp += acc*acc;
7857 		acc = fabs(ta[(i+5 )*incA]);tmp += acc*acc;
7858 		acc = fabs(ta[(i+6 )*incA]);tmp += acc*acc;
7859 		acc = fabs(ta[(i+7 )*incA]);tmp += acc*acc;
7860 		acc = fabs(ta[(i+8 )*incA]);tmp += acc*acc;
7861 		acc = fabs(ta[(i+9 )*incA]);tmp += acc*acc;
7862 		acc = fabs(ta[(i+10 )*incA]);tmp += acc*acc;
7863 		acc = fabs(ta[(i+11 )*incA]);tmp += acc*acc;
7864 		acc = fabs(ta[(i+12 )*incA]);tmp += acc*acc;
7865 		acc = fabs(ta[(i+13 )*incA]);tmp += acc*acc;
7866 		acc = fabs(ta[(i+14 )*incA]);tmp += acc*acc;
7867 		acc = fabs(ta[(i+15 )*incA]);tmp += acc*acc;
7868 	}
7869 for(     ;i<n;++i){ 	acc = fabs(ta[(i+0 )*incA]);tmp += acc*acc;
7870 	 }
7871 }
7872 ;
7873 	tc[0] = (sqrt(tmp));
7874 	}
7875 	else
7876 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7877 #ifdef RSB_NUMERICAL_TYPE_FLOAT
7878 	if( type == RSB_NUMERICAL_TYPE_FLOAT  )
7879 	{
7880 	const float*ta = a;float *tc = c,acc = ((float)(0)),tmp = ((float)(0));
7881 	{
7882 for(i=0;i+15<n;i+=16){
7883 	acc = fabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7884 		acc = fabsf(ta[(i+1 )*incA]);tmp += acc*acc;
7885 		acc = fabsf(ta[(i+2 )*incA]);tmp += acc*acc;
7886 		acc = fabsf(ta[(i+3 )*incA]);tmp += acc*acc;
7887 		acc = fabsf(ta[(i+4 )*incA]);tmp += acc*acc;
7888 		acc = fabsf(ta[(i+5 )*incA]);tmp += acc*acc;
7889 		acc = fabsf(ta[(i+6 )*incA]);tmp += acc*acc;
7890 		acc = fabsf(ta[(i+7 )*incA]);tmp += acc*acc;
7891 		acc = fabsf(ta[(i+8 )*incA]);tmp += acc*acc;
7892 		acc = fabsf(ta[(i+9 )*incA]);tmp += acc*acc;
7893 		acc = fabsf(ta[(i+10 )*incA]);tmp += acc*acc;
7894 		acc = fabsf(ta[(i+11 )*incA]);tmp += acc*acc;
7895 		acc = fabsf(ta[(i+12 )*incA]);tmp += acc*acc;
7896 		acc = fabsf(ta[(i+13 )*incA]);tmp += acc*acc;
7897 		acc = fabsf(ta[(i+14 )*incA]);tmp += acc*acc;
7898 		acc = fabsf(ta[(i+15 )*incA]);tmp += acc*acc;
7899 	}
7900 for(     ;i<n;++i){ 	acc = fabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7901 	 }
7902 }
7903 ;
7904 	tc[0] = (sqrtf(tmp));
7905 	}
7906 	else
7907 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7908 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
7909 	if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
7910 	{
7911 	const float complex*ta = a;float *tc = c,acc = ((float complex)(0)),tmp = ((float complex)(0));
7912 	{
7913 for(i=0;i+15<n;i+=16){
7914 	acc = cabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7915 		acc = cabsf(ta[(i+1 )*incA]);tmp += acc*acc;
7916 		acc = cabsf(ta[(i+2 )*incA]);tmp += acc*acc;
7917 		acc = cabsf(ta[(i+3 )*incA]);tmp += acc*acc;
7918 		acc = cabsf(ta[(i+4 )*incA]);tmp += acc*acc;
7919 		acc = cabsf(ta[(i+5 )*incA]);tmp += acc*acc;
7920 		acc = cabsf(ta[(i+6 )*incA]);tmp += acc*acc;
7921 		acc = cabsf(ta[(i+7 )*incA]);tmp += acc*acc;
7922 		acc = cabsf(ta[(i+8 )*incA]);tmp += acc*acc;
7923 		acc = cabsf(ta[(i+9 )*incA]);tmp += acc*acc;
7924 		acc = cabsf(ta[(i+10 )*incA]);tmp += acc*acc;
7925 		acc = cabsf(ta[(i+11 )*incA]);tmp += acc*acc;
7926 		acc = cabsf(ta[(i+12 )*incA]);tmp += acc*acc;
7927 		acc = cabsf(ta[(i+13 )*incA]);tmp += acc*acc;
7928 		acc = cabsf(ta[(i+14 )*incA]);tmp += acc*acc;
7929 		acc = cabsf(ta[(i+15 )*incA]);tmp += acc*acc;
7930 	}
7931 for(     ;i<n;++i){ 	acc = cabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7932 	 }
7933 }
7934 ;
7935 	tc[0] = crealf(csqrtf(tmp));
7936 	}
7937 	else
7938 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7939 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
7940 	if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
7941 	{
7942 	const double complex*ta = a;double *tc = c,acc = ((double complex)(0)),tmp = ((double complex)(0));
7943 	{
7944 for(i=0;i+15<n;i+=16){
7945 	acc = cabs(ta[(i+0 )*incA]);tmp += acc*acc;
7946 		acc = cabs(ta[(i+1 )*incA]);tmp += acc*acc;
7947 		acc = cabs(ta[(i+2 )*incA]);tmp += acc*acc;
7948 		acc = cabs(ta[(i+3 )*incA]);tmp += acc*acc;
7949 		acc = cabs(ta[(i+4 )*incA]);tmp += acc*acc;
7950 		acc = cabs(ta[(i+5 )*incA]);tmp += acc*acc;
7951 		acc = cabs(ta[(i+6 )*incA]);tmp += acc*acc;
7952 		acc = cabs(ta[(i+7 )*incA]);tmp += acc*acc;
7953 		acc = cabs(ta[(i+8 )*incA]);tmp += acc*acc;
7954 		acc = cabs(ta[(i+9 )*incA]);tmp += acc*acc;
7955 		acc = cabs(ta[(i+10 )*incA]);tmp += acc*acc;
7956 		acc = cabs(ta[(i+11 )*incA]);tmp += acc*acc;
7957 		acc = cabs(ta[(i+12 )*incA]);tmp += acc*acc;
7958 		acc = cabs(ta[(i+13 )*incA]);tmp += acc*acc;
7959 		acc = cabs(ta[(i+14 )*incA]);tmp += acc*acc;
7960 		acc = cabs(ta[(i+15 )*incA]);tmp += acc*acc;
7961 	}
7962 for(     ;i<n;++i){ 	acc = cabs(ta[(i+0 )*incA]);tmp += acc*acc;
7963 	 }
7964 }
7965 ;
7966 	tc[0] = creal(csqrt(tmp));
7967 	}
7968 	else
7969 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7970 	return RSB_ERR_UNSUPPORTED_TYPE	;
7971 	return RSB_ERR_NO_ERROR;
7972 }
7973 
rsb__cblas_Xdotu_sub(rsb_type_t type,size_t n,const void * x,rsb_nnz_idx_t incx,const void * y,rsb_nnz_idx_t incy,void * dotu)7974 rsb_err_t rsb__cblas_Xdotu_sub(rsb_type_t type, size_t n, const void * x, rsb_nnz_idx_t incx, const void * y, rsb_nnz_idx_t incy, void *dotu){
7975 	/*!
7976 	 * */
7977 	return rsb__vector_mult_sum(x,y,dotu,type,n,incx,incy);
7978 }
7979 
rsb__cblas_Xscal(rsb_type_t type,size_t n,const void * alphap,void * a,size_t stride)7980 rsb_err_t rsb__cblas_Xscal(rsb_type_t type, size_t n, const void * alphap, void * a, size_t stride){
7981 	/*!
7982 	 * a <- a * alpha
7983 	 * */
7984 	return rsb_strided_vector_scale(a,alphap,type,n,stride);
7985 }
7986 
rsb__coo_insertion_sort(rsb_type_t typecode,void * VB,rsb_coo_idx_t * IB,rsb_coo_idx_t * JB,rsb_nnz_idx_t offA,rsb_nnz_idx_t nnzA)7987 rsb_err_t rsb__coo_insertion_sort(rsb_type_t typecode, void* VB, rsb_coo_idx_t * IB, rsb_coo_idx_t * JB, rsb_nnz_idx_t offA, rsb_nnz_idx_t nnzA)
7988 {
7989 	/* only for *small* arrays, where allocation of a temporary array is not justified */
7990 	rsb_coo_idx_t * IA = NULL, *JA = NULL;
7991 	rsb_nnz_idx_t i, j;
7992 
7993 	IA = IB + offA;
7994 	JA = JB + offA;
7995 
7996 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
7997 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
7998 	{
7999 		double * VA = (double*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8000 		for(i=1;i<nnzA;++i)
8001 		for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8002 		{
8003 			RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8004 			RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8005 			RSB_SWAP(double        ,VA[j],VA[j-1]);
8006 		}
8007 	}
8008 	else
8009 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8010 #ifdef RSB_NUMERICAL_TYPE_FLOAT
8011 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
8012 	{
8013 		float * VA = (float*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8014 		for(i=1;i<nnzA;++i)
8015 		for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8016 		{
8017 			RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8018 			RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8019 			RSB_SWAP(float        ,VA[j],VA[j-1]);
8020 		}
8021 	}
8022 	else
8023 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8024 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
8025 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
8026 	{
8027 		float complex * VA = (float complex*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8028 		for(i=1;i<nnzA;++i)
8029 		for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8030 		{
8031 			RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8032 			RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8033 			RSB_SWAP(float complex        ,VA[j],VA[j-1]);
8034 		}
8035 	}
8036 	else
8037 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8038 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
8039 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
8040 	{
8041 		double complex * VA = (double complex*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8042 		for(i=1;i<nnzA;++i)
8043 		for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8044 		{
8045 			RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8046 			RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8047 			RSB_SWAP(double complex        ,VA[j],VA[j-1]);
8048 		}
8049 	}
8050 	else
8051 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8052 	return RSB_ERR_UNSUPPORTED_TYPE	;
8053 	return RSB_ERR_NO_ERROR;
8054 }
8055 
rsb__coo_to_lr(void * RSB_RESTRICT VBu,rsb_coo_idx_t * RSB_RESTRICT IB,rsb_coo_idx_t * RSB_RESTRICT JB,void * RSB_RESTRICT VAu,rsb_coo_idx_t * RSB_RESTRICT IA,rsb_coo_idx_t * RSB_RESTRICT JA,rsb_coo_idx_t mj,rsb_nnz_idx_t nnzA,rsb_nnz_idx_t nzoffB,rsb_nnz_idx_t nzoffA,rsb_nnz_idx_t * RSB_RESTRICT nzlp,rsb_nnz_idx_t * RSB_RESTRICT nzrp,rsb_coo_idx_t iadd,rsb_coo_idx_t jadd,rsb_type_t typecode)8056 void rsb__coo_to_lr( void * RSB_RESTRICT VBu, rsb_coo_idx_t*RSB_RESTRICT IB, rsb_coo_idx_t*RSB_RESTRICT JB, void * RSB_RESTRICT VAu, rsb_coo_idx_t*RSB_RESTRICT IA, rsb_coo_idx_t*RSB_RESTRICT JA, rsb_coo_idx_t mj, rsb_nnz_idx_t nnzA, rsb_nnz_idx_t nzoffB, rsb_nnz_idx_t nzoffA, rsb_nnz_idx_t*RSB_RESTRICT nzlp, rsb_nnz_idx_t*RSB_RESTRICT nzrp, rsb_coo_idx_t iadd, rsb_coo_idx_t jadd, rsb_type_t typecode)
8057 {
8058 	/*
8059 	 * Given COO arrays matrices A an (temporary) B, stores the coefficients left of the mj-th column before the one coming after it, respecting the row major ordering.
8060 	 * A serial function.
8061 	 * */
8062 	rsb_nnz_idx_t nzl = 0, nzr = 0, nzi = 0;
8063 
8064 	RSB_DEBUG_ASSERT(IA!=IB);
8065 	RSB_DEBUG_ASSERT(JA!=JB);
8066 	RSB_DEBUG_ASSERT(mtxAp);
8067 
8068 	IA += nzoffA;
8069 	JA += nzoffA;
8070 
8071 	IB += nzoffB;
8072 	JB += nzoffB;
8073 
8074 switch(typecode)
8075 {
8076 			/* supported (double,float,float complex,double complex) */
8077 case RSB_NUMERICAL_TYPE_DOUBLE 	:
8078 {
8079 	double * VA = VAu;
8080 	double * VB = VBu;
8081 	RSB_DEBUG_ASSERT(VA!=VB);
8082 
8083 	VA += nzoffA;
8084 	VB += nzoffB;
8085 
8086 	for(nzi=0;nzi<nnzA;++nzi)
8087 	{
8088 		if( JA[nzi] < mj )
8089 		{
8090 			IB[nzl] = IA[nzi] + iadd;
8091 			JB[nzl] = JA[nzi] ;
8092 			VB[nzl] = VA[nzi];
8093 			nzl++;
8094 		}
8095 		else
8096 		{
8097 			nzr++;
8098 			IB[nnzA-nzr] = IA[nzi] + iadd;
8099 			JB[nnzA-nzr] = JA[nzi] + jadd;
8100 			VB[nnzA-nzr] = VA[nzi];
8101 		}
8102 	}
8103 
8104 	/* copy left quadrant back to A */
8105 	for(nzi=0;nzi<nzl ;++nzi)
8106 	{
8107 		IA[nzi] = IB[nzi];
8108 		JA[nzi] = JB[nzi];
8109 		VA[nzi] = VB[nzi];
8110 	}
8111 
8112 	/* copy right quadrant back to A */
8113 	for(     ;nzi<nnzA;++nzi)
8114 	{
8115 		IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8116 		JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8117 		VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8118 	}
8119 }
8120 	break;
8121 case RSB_NUMERICAL_TYPE_FLOAT 	:
8122 {
8123 	float * VA = VAu;
8124 	float * VB = VBu;
8125 	RSB_DEBUG_ASSERT(VA!=VB);
8126 
8127 	VA += nzoffA;
8128 	VB += nzoffB;
8129 
8130 	for(nzi=0;nzi<nnzA;++nzi)
8131 	{
8132 		if( JA[nzi] < mj )
8133 		{
8134 			IB[nzl] = IA[nzi] + iadd;
8135 			JB[nzl] = JA[nzi] ;
8136 			VB[nzl] = VA[nzi];
8137 			nzl++;
8138 		}
8139 		else
8140 		{
8141 			nzr++;
8142 			IB[nnzA-nzr] = IA[nzi] + iadd;
8143 			JB[nnzA-nzr] = JA[nzi] + jadd;
8144 			VB[nnzA-nzr] = VA[nzi];
8145 		}
8146 	}
8147 
8148 	/* copy left quadrant back to A */
8149 	for(nzi=0;nzi<nzl ;++nzi)
8150 	{
8151 		IA[nzi] = IB[nzi];
8152 		JA[nzi] = JB[nzi];
8153 		VA[nzi] = VB[nzi];
8154 	}
8155 
8156 	/* copy right quadrant back to A */
8157 	for(     ;nzi<nnzA;++nzi)
8158 	{
8159 		IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8160 		JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8161 		VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8162 	}
8163 }
8164 	break;
8165 case RSB_NUMERICAL_TYPE_FLOAT_COMPLEX 	:
8166 {
8167 	float complex * VA = VAu;
8168 	float complex * VB = VBu;
8169 	RSB_DEBUG_ASSERT(VA!=VB);
8170 
8171 	VA += nzoffA;
8172 	VB += nzoffB;
8173 
8174 	for(nzi=0;nzi<nnzA;++nzi)
8175 	{
8176 		if( JA[nzi] < mj )
8177 		{
8178 			IB[nzl] = IA[nzi] + iadd;
8179 			JB[nzl] = JA[nzi] ;
8180 			VB[nzl] = VA[nzi];
8181 			nzl++;
8182 		}
8183 		else
8184 		{
8185 			nzr++;
8186 			IB[nnzA-nzr] = IA[nzi] + iadd;
8187 			JB[nnzA-nzr] = JA[nzi] + jadd;
8188 			VB[nnzA-nzr] = VA[nzi];
8189 		}
8190 	}
8191 
8192 	/* copy left quadrant back to A */
8193 	for(nzi=0;nzi<nzl ;++nzi)
8194 	{
8195 		IA[nzi] = IB[nzi];
8196 		JA[nzi] = JB[nzi];
8197 		VA[nzi] = VB[nzi];
8198 	}
8199 
8200 	/* copy right quadrant back to A */
8201 	for(     ;nzi<nnzA;++nzi)
8202 	{
8203 		IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8204 		JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8205 		VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8206 	}
8207 }
8208 	break;
8209 case RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX 	:
8210 {
8211 	double complex * VA = VAu;
8212 	double complex * VB = VBu;
8213 	RSB_DEBUG_ASSERT(VA!=VB);
8214 
8215 	VA += nzoffA;
8216 	VB += nzoffB;
8217 
8218 	for(nzi=0;nzi<nnzA;++nzi)
8219 	{
8220 		if( JA[nzi] < mj )
8221 		{
8222 			IB[nzl] = IA[nzi] + iadd;
8223 			JB[nzl] = JA[nzi] ;
8224 			VB[nzl] = VA[nzi];
8225 			nzl++;
8226 		}
8227 		else
8228 		{
8229 			nzr++;
8230 			IB[nnzA-nzr] = IA[nzi] + iadd;
8231 			JB[nnzA-nzr] = JA[nzi] + jadd;
8232 			VB[nnzA-nzr] = VA[nzi];
8233 		}
8234 	}
8235 
8236 	/* copy left quadrant back to A */
8237 	for(nzi=0;nzi<nzl ;++nzi)
8238 	{
8239 		IA[nzi] = IB[nzi];
8240 		JA[nzi] = JB[nzi];
8241 		VA[nzi] = VB[nzi];
8242 	}
8243 
8244 	/* copy right quadrant back to A */
8245 	for(     ;nzi<nnzA;++nzi)
8246 	{
8247 		IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8248 		JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8249 		VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8250 	}
8251 }
8252 	break;
8253 	/* unsupported type */
8254 	default :
8255 	RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS
8256 }
8257 
8258 	*nzlp = nzl;
8259 	*nzrp = nzr;
8260 }
8261 #ifdef __cplusplus
8262 }
8263 #endif  /* __cplusplus */
8264 
8265 /* @endcond */
8266