1 /* @cond INNERDOC */
2
3 /**
4 * @file
5 * @brief
6 * Auxiliary functions.
7 */
8
9 /*
10
11 Copyright (C) 2008-2020 Michele Martone
12
13 This file is part of librsb.
14
15 librsb is free software; you can redistribute it and/or modify it
16 under the terms of the GNU Lesser General Public License as published
17 by the Free Software Foundation; either version 3 of the License, or
18 (at your option) any later version.
19
20 librsb is distributed in the hope that it will be useful, but WITHOUT
21 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23 License for more details.
24
25 You should have received a copy of the GNU Lesser General Public
26 License along with librsb; see the file COPYING.
27 If not, see <http://www.gnu.org/licenses/>.
28
29 */
30 /*
31 The code in this file was generated automatically by an M4 script.
32 It is not meant to be used as an API (Application Programming Interface).
33 p.s.: right now, only row major matrix access is considered.
34
35 */
36
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif /* __cplusplus */
41
42 #define RSB_WANT_OMP 1
43 #define RSB_MAX_OMP_THREADS 4
44 #include <omp.h> /* OpenMP parallelism (EXPERIMENTAL) */
45
46
47 #include "rsb_common.h"
48 /* non blas-like functions */
49
rsb__util_m4_sanity_check(void)50 rsb_err_t rsb__util_m4_sanity_check(void){
51 /**
52 There are bugs in the m4 macros or a bad m4 implementation which will trigger this test to fail.
53 We are interested in catching them, as we should rely on a sane m4 environment.
54 */
55 /* generated by the $0 macro */
56
57 if(
58 0!=0 ||
59 1!=1 ||
60 1!=1 ||
61 0!=0 ||
62 0!=0 ||
63 0!=0 ||
64 0!=0 ||
65 1!=1 ||
66 0!=0 ||
67 1!=1 ||
68 1!=1 ||
69 1!=1 ||
70 0
71 )
72 goto err;
73 return RSB_ERR_NO_ERROR;
74 err:
75 return RSB_ERR_INTERNAL_ERROR;
76 }
77
rsb__util_increase_by_one(void * p,rsb_nnz_idx_t n,rsb_flags_t typecode)78 const void * rsb__util_increase_by_one(void *p, rsb_nnz_idx_t n, rsb_flags_t typecode){
79 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
80 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE ) {(((double*)p)[n])+=1;return p;}
81 else
82 #endif
83 #ifdef RSB_NUMERICAL_TYPE_FLOAT
84 if( typecode == RSB_NUMERICAL_TYPE_FLOAT ) {(((float*)p)[n])+=1;return p;}
85 else
86 #endif
87 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
88 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ) {(((float complex*)p)[n])+=1;return p;}
89 else
90 #endif
91 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
92 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ) {(((double complex*)p)[n])+=1;return p;}
93 else
94 #endif
95 return NULL;
96 }
97
rsb__util_set_area_to_fraction_of_integer(void * p,const int alphai,rsb_flags_t typecode)98 void rsb__util_set_area_to_fraction_of_integer(void *p, const int alphai, rsb_flags_t typecode){
99 /*
100 alpha NULL will imply 1
101 */
102 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
103 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE ) {*(double*)p = 1;*(double*)p/=alphai;}
104 else
105 #endif
106 #ifdef RSB_NUMERICAL_TYPE_FLOAT
107 if( typecode == RSB_NUMERICAL_TYPE_FLOAT ) {*(float*)p = 1;*(float*)p/=alphai;}
108 else
109 #endif
110 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
111 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ) {*(float complex*)p = 1;*(float complex*)p/=alphai;}
112 else
113 #endif
114 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
115 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ) {*(double complex*)p = 1;*(double complex*)p/=alphai;}
116 else
117 #endif
118 return;
119 }
120
rsb__util_set_area_to_negated_fraction(void * p,const void * alpha,rsb_flags_t typecode)121 void rsb__util_set_area_to_negated_fraction(void *p, const void *alpha, rsb_flags_t typecode){
122 /*
123 alpha NULL will imply 1
124 */
125 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
126 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE ) {*(double*)p = -1;if(alpha)*(double*)p/=(*(double*)alpha);}
127 else
128 #endif
129 #ifdef RSB_NUMERICAL_TYPE_FLOAT
130 if( typecode == RSB_NUMERICAL_TYPE_FLOAT ) {*(float*)p = -1;if(alpha)*(float*)p/=(*(float*)alpha);}
131 else
132 #endif
133 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
134 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ) {*(float complex*)p = -1;if(alpha)*(float complex*)p/=(*(float complex*)alpha);}
135 else
136 #endif
137 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
138 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ) {*(double complex*)p = -1;if(alpha)*(double complex*)p/=(*(double complex*)alpha);}
139 else
140 #endif
141 return;
142 }
143
rsb__util_set_area_to_converted_integer(void * p,rsb_flags_t typecode,const rsb_int n)144 void rsb__util_set_area_to_converted_integer(void *p, rsb_flags_t typecode, const rsb_int n){
145 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
146 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE ) {*(double*)p = (double)n;}
147 else
148 #endif
149 #ifdef RSB_NUMERICAL_TYPE_FLOAT
150 if( typecode == RSB_NUMERICAL_TYPE_FLOAT ) {*(float*)p = (float)n;}
151 else
152 #endif
153 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
154 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ) {*(float complex*)p = (float complex)n;}
155 else
156 #endif
157 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
158 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ) {*(double complex*)p = (double complex)n;}
159 else
160 #endif
161 return;
162 }
163
rsb__util_get_partitioning_array(size_t bs,size_t X,rsb_blk_idx_t * X_b,rsb_flags_t flags)164 rsb_coo_idx_t * rsb__util_get_partitioning_array( size_t bs, size_t X , rsb_blk_idx_t * X_b, rsb_flags_t flags){
165 /*!
166 * Given a block size (be it rows or columns), an element size X in bytes,
167 * and a dimension (rows or columns), returns an array containing the
168 * indices of the elements in each block.
169 *
170 * Therefore, the allocated arrays
171 *
172 * \param bs the block size
173 * \param X the rows or columns count
174 * \param X_b on output, the allocated array elements count : (X+bs-1)/bs
175 * \return NULL on error; a valid array pointer on success
176 *
177 * FIXME : why not size_t ? or maybe rsb_size_t ?
178 * */
179 size_t i;
180 rsb_err_t errval = RSB_ERR_NO_ERROR;
181 rsb_coo_idx_t * p_x = NULL;
182
183 *X_b = (X+bs-1)/bs;
184
185 /* WARNING : 1 is the extreme limit before overflow :) */
186 if( ( ((size_t)(*X_b)) < ((size_t)((X+bs-1)/bs))) || (RSB_BLK_ADD_OVERFLOW(*X_b,1)) )
187 {
188 /* overflow. should print some message. */
189 errval = RSB_ERR_LIMITS;goto err;
190 }
191
192 p_x = rsb__malloc(sizeof(rsb_coo_idx_t)*(*X_b+1));
193 if(! p_x) goto err;
194 /* note: should use some perrno some day */
195
196 /* note the last block size : it is the same, regardless congruences */
197 {
198 for(i=0;i+15<*X_b;i+=16){
199 p_x[i+0 ] = (i+0 )*bs;
200 p_x[i+1 ] = (i+1 )*bs;
201 p_x[i+2 ] = (i+2 )*bs;
202 p_x[i+3 ] = (i+3 )*bs;
203 p_x[i+4 ] = (i+4 )*bs;
204 p_x[i+5 ] = (i+5 )*bs;
205 p_x[i+6 ] = (i+6 )*bs;
206 p_x[i+7 ] = (i+7 )*bs;
207 p_x[i+8 ] = (i+8 )*bs;
208 p_x[i+9 ] = (i+9 )*bs;
209 p_x[i+10 ] = (i+10 )*bs;
210 p_x[i+11 ] = (i+11 )*bs;
211 p_x[i+12 ] = (i+12 )*bs;
212 p_x[i+13 ] = (i+13 )*bs;
213 p_x[i+14 ] = (i+14 )*bs;
214 p_x[i+15 ] = (i+15 )*bs;
215 }
216 for( ;i<*X_b;++i){ p_x[i+0 ] = (i+0 )*bs;
217 }
218 }
219
220
221 /* FIXME : this point should be remarked and documented way better ! */
222 if(flags&(RSB_FLAG_WANT_BCSS_STORAGE|RSB_FLAG_WANT_FIXED_BLOCKING_VBR))
223 p_x[*X_b] = *X_b*bs; /* the last element of p_x is the index of the last matrix row/column + 1 */
224 else
225 p_x[*X_b] = X; /* the last element of p_x is the index of the last matrix row/column + 1 */
226
227 return p_x;
228 err:
229 RSB_CONDITIONAL_FREE(p_x);
230 rsb__do_perror(NULL,errval);
231 return NULL;
232 }
233
rsb__vector_diff(void * c,const void * a,const void * b,rsb_type_t type,size_t n)234 rsb_err_t rsb__vector_diff(void * c, const void * a, const void * b, rsb_type_t type, size_t n){
235 /*!
236 * c <- a-b
237 *
238 * \param array an array pointer
239 * \param type a valid type code
240 * \param n the input array length
241 * \note see daxpy,dcopy in BLAS
242 *
243 * \return \rsberrcodemsg
244 * */
245 size_t i;
246 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
247 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
248 {
249 const double*ta = a,*tb = b;double *tc = c;
250 {
251 for(i=0;i+15<n;i+=16){
252 tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
253 tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
254 tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
255 tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
256 tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
257 tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
258 tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
259 tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
260 tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
261 tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
262 tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
263 tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
264 tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
265 tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
266 tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
267 tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
268 }
269 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
270 }
271 }
272 ;
273 }
274 else
275 #endif
276 #ifdef RSB_NUMERICAL_TYPE_FLOAT
277 if( type == RSB_NUMERICAL_TYPE_FLOAT )
278 {
279 const float*ta = a,*tb = b;float *tc = c;
280 {
281 for(i=0;i+15<n;i+=16){
282 tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
283 tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
284 tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
285 tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
286 tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
287 tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
288 tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
289 tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
290 tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
291 tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
292 tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
293 tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
294 tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
295 tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
296 tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
297 tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
298 }
299 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
300 }
301 }
302 ;
303 }
304 else
305 #endif
306 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
307 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
308 {
309 const float complex*ta = a,*tb = b;float complex *tc = c;
310 {
311 for(i=0;i+15<n;i+=16){
312 tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
313 tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
314 tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
315 tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
316 tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
317 tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
318 tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
319 tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
320 tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
321 tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
322 tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
323 tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
324 tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
325 tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
326 tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
327 tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
328 }
329 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
330 }
331 }
332 ;
333 }
334 else
335 #endif
336 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
337 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
338 {
339 const double complex*ta = a,*tb = b;double complex *tc = c;
340 {
341 for(i=0;i+15<n;i+=16){
342 tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
343 tc[i+1 ] = ta[i+1 ]-tb[i+1 ];
344 tc[i+2 ] = ta[i+2 ]-tb[i+2 ];
345 tc[i+3 ] = ta[i+3 ]-tb[i+3 ];
346 tc[i+4 ] = ta[i+4 ]-tb[i+4 ];
347 tc[i+5 ] = ta[i+5 ]-tb[i+5 ];
348 tc[i+6 ] = ta[i+6 ]-tb[i+6 ];
349 tc[i+7 ] = ta[i+7 ]-tb[i+7 ];
350 tc[i+8 ] = ta[i+8 ]-tb[i+8 ];
351 tc[i+9 ] = ta[i+9 ]-tb[i+9 ];
352 tc[i+10 ] = ta[i+10 ]-tb[i+10 ];
353 tc[i+11 ] = ta[i+11 ]-tb[i+11 ];
354 tc[i+12 ] = ta[i+12 ]-tb[i+12 ];
355 tc[i+13 ] = ta[i+13 ]-tb[i+13 ];
356 tc[i+14 ] = ta[i+14 ]-tb[i+14 ];
357 tc[i+15 ] = ta[i+15 ]-tb[i+15 ];
358 }
359 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]-tb[i+0 ];
360 }
361 }
362 ;
363 }
364 else
365 #endif
366 return RSB_ERR_UNSUPPORTED_TYPE ;
367 return RSB_ERR_NO_ERROR;
368 }
369
rsb_vector_norm_square(void * c,const void * a,rsb_type_t type,size_t n)370 static rsb_err_t rsb_vector_norm_square(void * c, const void * a, rsb_type_t type, size_t n)
371 {
372 /*!
373 * c <- a^T*a
374 *
375 * \param a an array pointer
376 * \param type a valid type code
377 * \param n the input array length
378 * \note see ddot in BLAS
379 *
380 * \return \rsberrcodemsg
381 * */
382 size_t i;
383 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
384 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
385 {
386 const double*ta = a;double *tc = c;
387 tc[0] = ((double)(0));
388 {
389 for(i=0;i+15<n;i+=16){
390 tc[0]+=ta[i+0 ]*ta[i+0 ];
391 tc[0]+=ta[i+1 ]*ta[i+1 ];
392 tc[0]+=ta[i+2 ]*ta[i+2 ];
393 tc[0]+=ta[i+3 ]*ta[i+3 ];
394 tc[0]+=ta[i+4 ]*ta[i+4 ];
395 tc[0]+=ta[i+5 ]*ta[i+5 ];
396 tc[0]+=ta[i+6 ]*ta[i+6 ];
397 tc[0]+=ta[i+7 ]*ta[i+7 ];
398 tc[0]+=ta[i+8 ]*ta[i+8 ];
399 tc[0]+=ta[i+9 ]*ta[i+9 ];
400 tc[0]+=ta[i+10 ]*ta[i+10 ];
401 tc[0]+=ta[i+11 ]*ta[i+11 ];
402 tc[0]+=ta[i+12 ]*ta[i+12 ];
403 tc[0]+=ta[i+13 ]*ta[i+13 ];
404 tc[0]+=ta[i+14 ]*ta[i+14 ];
405 tc[0]+=ta[i+15 ]*ta[i+15 ];
406 }
407 for( ;i<n;++i){ tc[0]+=ta[i+0 ]*ta[i+0 ];
408 }
409 }
410 ;
411 }
412 else
413 #endif
414 #ifdef RSB_NUMERICAL_TYPE_FLOAT
415 if( type == RSB_NUMERICAL_TYPE_FLOAT )
416 {
417 const float*ta = a;float *tc = c;
418 tc[0] = ((float)(0));
419 {
420 for(i=0;i+15<n;i+=16){
421 tc[0]+=ta[i+0 ]*ta[i+0 ];
422 tc[0]+=ta[i+1 ]*ta[i+1 ];
423 tc[0]+=ta[i+2 ]*ta[i+2 ];
424 tc[0]+=ta[i+3 ]*ta[i+3 ];
425 tc[0]+=ta[i+4 ]*ta[i+4 ];
426 tc[0]+=ta[i+5 ]*ta[i+5 ];
427 tc[0]+=ta[i+6 ]*ta[i+6 ];
428 tc[0]+=ta[i+7 ]*ta[i+7 ];
429 tc[0]+=ta[i+8 ]*ta[i+8 ];
430 tc[0]+=ta[i+9 ]*ta[i+9 ];
431 tc[0]+=ta[i+10 ]*ta[i+10 ];
432 tc[0]+=ta[i+11 ]*ta[i+11 ];
433 tc[0]+=ta[i+12 ]*ta[i+12 ];
434 tc[0]+=ta[i+13 ]*ta[i+13 ];
435 tc[0]+=ta[i+14 ]*ta[i+14 ];
436 tc[0]+=ta[i+15 ]*ta[i+15 ];
437 }
438 for( ;i<n;++i){ tc[0]+=ta[i+0 ]*ta[i+0 ];
439 }
440 }
441 ;
442 }
443 else
444 #endif
445 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
446 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
447 {
448 const float complex*ta = a;float complex *tc = c;
449 tc[0] = ((float complex)(0));
450 {
451 for(i=0;i+15<n;i+=16){
452 tc[0]+=ta[i+0 ]*ta[i+0 ];
453 tc[0]+=ta[i+1 ]*ta[i+1 ];
454 tc[0]+=ta[i+2 ]*ta[i+2 ];
455 tc[0]+=ta[i+3 ]*ta[i+3 ];
456 tc[0]+=ta[i+4 ]*ta[i+4 ];
457 tc[0]+=ta[i+5 ]*ta[i+5 ];
458 tc[0]+=ta[i+6 ]*ta[i+6 ];
459 tc[0]+=ta[i+7 ]*ta[i+7 ];
460 tc[0]+=ta[i+8 ]*ta[i+8 ];
461 tc[0]+=ta[i+9 ]*ta[i+9 ];
462 tc[0]+=ta[i+10 ]*ta[i+10 ];
463 tc[0]+=ta[i+11 ]*ta[i+11 ];
464 tc[0]+=ta[i+12 ]*ta[i+12 ];
465 tc[0]+=ta[i+13 ]*ta[i+13 ];
466 tc[0]+=ta[i+14 ]*ta[i+14 ];
467 tc[0]+=ta[i+15 ]*ta[i+15 ];
468 }
469 for( ;i<n;++i){ tc[0]+=ta[i+0 ]*ta[i+0 ];
470 }
471 }
472 ;
473 }
474 else
475 #endif
476 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
477 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
478 {
479 const double complex*ta = a;double complex *tc = c;
480 tc[0] = ((double complex)(0));
481 {
482 for(i=0;i+15<n;i+=16){
483 tc[0]+=ta[i+0 ]*ta[i+0 ];
484 tc[0]+=ta[i+1 ]*ta[i+1 ];
485 tc[0]+=ta[i+2 ]*ta[i+2 ];
486 tc[0]+=ta[i+3 ]*ta[i+3 ];
487 tc[0]+=ta[i+4 ]*ta[i+4 ];
488 tc[0]+=ta[i+5 ]*ta[i+5 ];
489 tc[0]+=ta[i+6 ]*ta[i+6 ];
490 tc[0]+=ta[i+7 ]*ta[i+7 ];
491 tc[0]+=ta[i+8 ]*ta[i+8 ];
492 tc[0]+=ta[i+9 ]*ta[i+9 ];
493 tc[0]+=ta[i+10 ]*ta[i+10 ];
494 tc[0]+=ta[i+11 ]*ta[i+11 ];
495 tc[0]+=ta[i+12 ]*ta[i+12 ];
496 tc[0]+=ta[i+13 ]*ta[i+13 ];
497 tc[0]+=ta[i+14 ]*ta[i+14 ];
498 tc[0]+=ta[i+15 ]*ta[i+15 ];
499 }
500 for( ;i<n;++i){ tc[0]+=ta[i+0 ]*ta[i+0 ];
501 }
502 }
503 ;
504 }
505 else
506 #endif
507 return RSB_ERR_UNSUPPORTED_TYPE ;
508 return RSB_ERR_NO_ERROR;
509 }
510
rsb_vector_norm(void * c,const void * a,rsb_type_t type,size_t n)511 static rsb_err_t rsb_vector_norm(void * c, const void * a, rsb_type_t type, size_t n)
512 {
513 /*!
514 * c <- sqrt(a^T*a)
515 *
516 * \param array an array pointer
517 * \param type a valid type code
518 * \param n the input array length
519 * \note see ddot in BLAS
520 *
521 * \return \rsberrcodemsg
522 * */
523 rsb_err_t errval;
524 if(!c)
525 return RSB_ERR_BADARGS;
526 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
527 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
528 {
529 double*cp = (double*)c;
530 errval = rsb_vector_norm_square(cp,a,type,n);
531 *cp = sqrt(*cp);
532 }
533 else
534 #endif
535 #ifdef RSB_NUMERICAL_TYPE_FLOAT
536 if( type == RSB_NUMERICAL_TYPE_FLOAT )
537 {
538 float*cp = (float*)c;
539 errval = rsb_vector_norm_square(cp,a,type,n);
540 *cp = sqrtf(*cp);
541 }
542 else
543 #endif
544 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
545 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
546 {
547 float complex*cp = (float complex*)c;
548 errval = rsb_vector_norm_square(cp,a,type,n);
549 *cp = csqrtf(*cp);
550 }
551 else
552 #endif
553 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
554 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
555 {
556 double complex*cp = (double complex*)c;
557 errval = rsb_vector_norm_square(cp,a,type,n);
558 *cp = csqrt(*cp);
559 }
560 else
561 #endif
562 errval = RSB_ERR_UNSUPPORTED_TYPE;
563 RSB_DO_ERR_RETURN(errval)
564 }
565
rsb_vector_norm_square_strided(void * c,const void * a,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)566 static rsb_err_t rsb_vector_norm_square_strided(void * c, const void * a, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
567 {
568 /*!
569 * c <- a^T*a
570 *
571 * \param array an array pointer
572 * \param type a valid type code
573 * \param n the input array length
574 * \note see ddot in BLAS
575 *
576 * \return \rsberrcodemsg
577 * */
578 size_t i;
579 if(inc==1)
580 return rsb_vector_norm_square(c,a,type,n);
581 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
582 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
583 {
584 const double*ta = a;double *tc = c;
585 tc[0] = ((double)(0));
586 {
587 for(i=0;i+15<n;i+=16){
588 tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
589 tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
590 tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
591 tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
592 tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
593 tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
594 tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
595 tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
596 tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
597 tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
598 tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
599 tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
600 tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
601 tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
602 tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
603 tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
604 }
605 for( ;i<n;++i){ tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
606 }
607 }
608 ;
609 }
610 else
611 #endif
612 #ifdef RSB_NUMERICAL_TYPE_FLOAT
613 if( type == RSB_NUMERICAL_TYPE_FLOAT )
614 {
615 const float*ta = a;float *tc = c;
616 tc[0] = ((float)(0));
617 {
618 for(i=0;i+15<n;i+=16){
619 tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
620 tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
621 tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
622 tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
623 tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
624 tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
625 tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
626 tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
627 tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
628 tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
629 tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
630 tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
631 tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
632 tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
633 tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
634 tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
635 }
636 for( ;i<n;++i){ tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
637 }
638 }
639 ;
640 }
641 else
642 #endif
643 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
644 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
645 {
646 const float complex*ta = a;float complex *tc = c;
647 tc[0] = ((float complex)(0));
648 {
649 for(i=0;i+15<n;i+=16){
650 tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
651 tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
652 tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
653 tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
654 tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
655 tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
656 tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
657 tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
658 tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
659 tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
660 tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
661 tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
662 tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
663 tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
664 tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
665 tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
666 }
667 for( ;i<n;++i){ tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
668 }
669 }
670 ;
671 }
672 else
673 #endif
674 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
675 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
676 {
677 const double complex*ta = a;double complex *tc = c;
678 tc[0] = ((double complex)(0));
679 {
680 for(i=0;i+15<n;i+=16){
681 tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
682 tc[0]+=ta[(i+1 )*inc]*ta[(i+1 )*inc];
683 tc[0]+=ta[(i+2 )*inc]*ta[(i+2 )*inc];
684 tc[0]+=ta[(i+3 )*inc]*ta[(i+3 )*inc];
685 tc[0]+=ta[(i+4 )*inc]*ta[(i+4 )*inc];
686 tc[0]+=ta[(i+5 )*inc]*ta[(i+5 )*inc];
687 tc[0]+=ta[(i+6 )*inc]*ta[(i+6 )*inc];
688 tc[0]+=ta[(i+7 )*inc]*ta[(i+7 )*inc];
689 tc[0]+=ta[(i+8 )*inc]*ta[(i+8 )*inc];
690 tc[0]+=ta[(i+9 )*inc]*ta[(i+9 )*inc];
691 tc[0]+=ta[(i+10 )*inc]*ta[(i+10 )*inc];
692 tc[0]+=ta[(i+11 )*inc]*ta[(i+11 )*inc];
693 tc[0]+=ta[(i+12 )*inc]*ta[(i+12 )*inc];
694 tc[0]+=ta[(i+13 )*inc]*ta[(i+13 )*inc];
695 tc[0]+=ta[(i+14 )*inc]*ta[(i+14 )*inc];
696 tc[0]+=ta[(i+15 )*inc]*ta[(i+15 )*inc];
697 }
698 for( ;i<n;++i){ tc[0]+=ta[(i+0 )*inc]*ta[(i+0 )*inc];
699 }
700 }
701 ;
702 }
703 else
704 #endif
705 return RSB_ERR_UNSUPPORTED_TYPE ;
706 return RSB_ERR_NO_ERROR;
707 }
708
rsb__vector_norm_strided(void * c,const void * a,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)709 rsb_err_t rsb__vector_norm_strided(void * c, const void * a, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
710 {
711 /*!
712 * c <- sqrt(a^T*a)
713 *
714 * \param array an array pointer
715 * \param type a valid type code
716 * \param n the input array length
717 * \note see ddot in BLAS
718 *
719 * \return \rsberrcodemsg
720 * */
721 rsb_err_t errval;
722 if(!c)
723 return RSB_ERR_BADARGS;
724 if(inc==1)
725 return rsb_vector_norm(c,a,type,n);
726 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
727 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
728 {
729 double*cp = (double*)c;
730 errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
731 *cp = sqrt(*cp);
732 }
733 else
734 #endif
735 #ifdef RSB_NUMERICAL_TYPE_FLOAT
736 if( type == RSB_NUMERICAL_TYPE_FLOAT )
737 {
738 float*cp = (float*)c;
739 errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
740 *cp = sqrtf(*cp);
741 }
742 else
743 #endif
744 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
745 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
746 {
747 float complex*cp = (float complex*)c;
748 errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
749 *cp = csqrtf(*cp);
750 }
751 else
752 #endif
753 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
754 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
755 {
756 double complex*cp = (double complex*)c;
757 errval = rsb_vector_norm_square_strided(cp,a,type,n,inc);
758 *cp = csqrt(*cp);
759 }
760 else
761 #endif
762 errval = RSB_ERR_UNSUPPORTED_TYPE;
763 RSB_DO_ERR_RETURN(errval)
764 }
765
rsb__util_vector_sum_strided(void * c,const void * a,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)766 rsb_err_t rsb__util_vector_sum_strided(void * c, const void * a, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
767 {
768 /*!
769 * c <- sum(a)
770 *
771 * \param array an array pointer
772 * \param type a valid type code
773 * \param n the input array length
774 * \note see ddot in BLAS
775 *
776 * \return \rsberrcodemsg
777 * */
778 size_t i;
779 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
780 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
781 {
782 register double acc = ((double)(0)); const double*ta = a; double*tc = c;
783 {
784 for(i=0;i+15<n;i+=16){
785 acc+=ta[(i+0 )*inc];
786 acc+=ta[(i+1 )*inc];
787 acc+=ta[(i+2 )*inc];
788 acc+=ta[(i+3 )*inc];
789 acc+=ta[(i+4 )*inc];
790 acc+=ta[(i+5 )*inc];
791 acc+=ta[(i+6 )*inc];
792 acc+=ta[(i+7 )*inc];
793 acc+=ta[(i+8 )*inc];
794 acc+=ta[(i+9 )*inc];
795 acc+=ta[(i+10 )*inc];
796 acc+=ta[(i+11 )*inc];
797 acc+=ta[(i+12 )*inc];
798 acc+=ta[(i+13 )*inc];
799 acc+=ta[(i+14 )*inc];
800 acc+=ta[(i+15 )*inc];
801 }
802 for( ;i<n;++i){ acc+=ta[(i+0 )*inc];
803 }
804 }
805 ;
806 tc[0] = acc;
807 }
808 else
809 #endif
810 #ifdef RSB_NUMERICAL_TYPE_FLOAT
811 if( type == RSB_NUMERICAL_TYPE_FLOAT )
812 {
813 register float acc = ((float)(0)); const float*ta = a; float*tc = c;
814 {
815 for(i=0;i+15<n;i+=16){
816 acc+=ta[(i+0 )*inc];
817 acc+=ta[(i+1 )*inc];
818 acc+=ta[(i+2 )*inc];
819 acc+=ta[(i+3 )*inc];
820 acc+=ta[(i+4 )*inc];
821 acc+=ta[(i+5 )*inc];
822 acc+=ta[(i+6 )*inc];
823 acc+=ta[(i+7 )*inc];
824 acc+=ta[(i+8 )*inc];
825 acc+=ta[(i+9 )*inc];
826 acc+=ta[(i+10 )*inc];
827 acc+=ta[(i+11 )*inc];
828 acc+=ta[(i+12 )*inc];
829 acc+=ta[(i+13 )*inc];
830 acc+=ta[(i+14 )*inc];
831 acc+=ta[(i+15 )*inc];
832 }
833 for( ;i<n;++i){ acc+=ta[(i+0 )*inc];
834 }
835 }
836 ;
837 tc[0] = acc;
838 }
839 else
840 #endif
841 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
842 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
843 {
844 register float complex acc = ((float complex)(0)); const float complex*ta = a; float complex*tc = c;
845 {
846 for(i=0;i+15<n;i+=16){
847 acc+=ta[(i+0 )*inc];
848 acc+=ta[(i+1 )*inc];
849 acc+=ta[(i+2 )*inc];
850 acc+=ta[(i+3 )*inc];
851 acc+=ta[(i+4 )*inc];
852 acc+=ta[(i+5 )*inc];
853 acc+=ta[(i+6 )*inc];
854 acc+=ta[(i+7 )*inc];
855 acc+=ta[(i+8 )*inc];
856 acc+=ta[(i+9 )*inc];
857 acc+=ta[(i+10 )*inc];
858 acc+=ta[(i+11 )*inc];
859 acc+=ta[(i+12 )*inc];
860 acc+=ta[(i+13 )*inc];
861 acc+=ta[(i+14 )*inc];
862 acc+=ta[(i+15 )*inc];
863 }
864 for( ;i<n;++i){ acc+=ta[(i+0 )*inc];
865 }
866 }
867 ;
868 tc[0] = acc;
869 }
870 else
871 #endif
872 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
873 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
874 {
875 register double complex acc = ((double complex)(0)); const double complex*ta = a; double complex*tc = c;
876 {
877 for(i=0;i+15<n;i+=16){
878 acc+=ta[(i+0 )*inc];
879 acc+=ta[(i+1 )*inc];
880 acc+=ta[(i+2 )*inc];
881 acc+=ta[(i+3 )*inc];
882 acc+=ta[(i+4 )*inc];
883 acc+=ta[(i+5 )*inc];
884 acc+=ta[(i+6 )*inc];
885 acc+=ta[(i+7 )*inc];
886 acc+=ta[(i+8 )*inc];
887 acc+=ta[(i+9 )*inc];
888 acc+=ta[(i+10 )*inc];
889 acc+=ta[(i+11 )*inc];
890 acc+=ta[(i+12 )*inc];
891 acc+=ta[(i+13 )*inc];
892 acc+=ta[(i+14 )*inc];
893 acc+=ta[(i+15 )*inc];
894 }
895 for( ;i<n;++i){ acc+=ta[(i+0 )*inc];
896 }
897 }
898 ;
899 tc[0] = acc;
900 }
901 else
902 #endif
903 return RSB_ERR_UNSUPPORTED_TYPE ;
904 return RSB_ERR_NO_ERROR;
905 }
906
rsb__util_vector_sum(void * c,const void * a,rsb_type_t type,size_t n)907 rsb_err_t rsb__util_vector_sum(void * c, const void * a, rsb_type_t type, size_t n)
908 {
909 /*!
910 * c <- sum(a)
911 *
912 * \param array an array pointer
913 * \param type a valid type code
914 * \param n the input array length
915 * \note see ddot in BLAS
916 *
917 * \return \rsberrcodemsg
918 * */
919 size_t i;
920 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
921 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
922 {
923 const double*ta = a; double*tc = c; tc[0] = ((double)(0));
924 {
925 for(i=0;i+15<n;i+=16){
926 tc[0]+=ta[i+0 ];
927 tc[0]+=ta[i+1 ];
928 tc[0]+=ta[i+2 ];
929 tc[0]+=ta[i+3 ];
930 tc[0]+=ta[i+4 ];
931 tc[0]+=ta[i+5 ];
932 tc[0]+=ta[i+6 ];
933 tc[0]+=ta[i+7 ];
934 tc[0]+=ta[i+8 ];
935 tc[0]+=ta[i+9 ];
936 tc[0]+=ta[i+10 ];
937 tc[0]+=ta[i+11 ];
938 tc[0]+=ta[i+12 ];
939 tc[0]+=ta[i+13 ];
940 tc[0]+=ta[i+14 ];
941 tc[0]+=ta[i+15 ];
942 }
943 for( ;i<n;++i){ tc[0]+=ta[i+0 ];
944 }
945 }
946 ;
947 }
948 else
949 #endif
950 #ifdef RSB_NUMERICAL_TYPE_FLOAT
951 if( type == RSB_NUMERICAL_TYPE_FLOAT )
952 {
953 const float*ta = a; float*tc = c; tc[0] = ((float)(0));
954 {
955 for(i=0;i+15<n;i+=16){
956 tc[0]+=ta[i+0 ];
957 tc[0]+=ta[i+1 ];
958 tc[0]+=ta[i+2 ];
959 tc[0]+=ta[i+3 ];
960 tc[0]+=ta[i+4 ];
961 tc[0]+=ta[i+5 ];
962 tc[0]+=ta[i+6 ];
963 tc[0]+=ta[i+7 ];
964 tc[0]+=ta[i+8 ];
965 tc[0]+=ta[i+9 ];
966 tc[0]+=ta[i+10 ];
967 tc[0]+=ta[i+11 ];
968 tc[0]+=ta[i+12 ];
969 tc[0]+=ta[i+13 ];
970 tc[0]+=ta[i+14 ];
971 tc[0]+=ta[i+15 ];
972 }
973 for( ;i<n;++i){ tc[0]+=ta[i+0 ];
974 }
975 }
976 ;
977 }
978 else
979 #endif
980 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
981 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
982 {
983 const float complex*ta = a; float complex*tc = c; tc[0] = ((float complex)(0));
984 {
985 for(i=0;i+15<n;i+=16){
986 tc[0]+=ta[i+0 ];
987 tc[0]+=ta[i+1 ];
988 tc[0]+=ta[i+2 ];
989 tc[0]+=ta[i+3 ];
990 tc[0]+=ta[i+4 ];
991 tc[0]+=ta[i+5 ];
992 tc[0]+=ta[i+6 ];
993 tc[0]+=ta[i+7 ];
994 tc[0]+=ta[i+8 ];
995 tc[0]+=ta[i+9 ];
996 tc[0]+=ta[i+10 ];
997 tc[0]+=ta[i+11 ];
998 tc[0]+=ta[i+12 ];
999 tc[0]+=ta[i+13 ];
1000 tc[0]+=ta[i+14 ];
1001 tc[0]+=ta[i+15 ];
1002 }
1003 for( ;i<n;++i){ tc[0]+=ta[i+0 ];
1004 }
1005 }
1006 ;
1007 }
1008 else
1009 #endif
1010 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1011 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1012 {
1013 const double complex*ta = a; double complex*tc = c; tc[0] = ((double complex)(0));
1014 {
1015 for(i=0;i+15<n;i+=16){
1016 tc[0]+=ta[i+0 ];
1017 tc[0]+=ta[i+1 ];
1018 tc[0]+=ta[i+2 ];
1019 tc[0]+=ta[i+3 ];
1020 tc[0]+=ta[i+4 ];
1021 tc[0]+=ta[i+5 ];
1022 tc[0]+=ta[i+6 ];
1023 tc[0]+=ta[i+7 ];
1024 tc[0]+=ta[i+8 ];
1025 tc[0]+=ta[i+9 ];
1026 tc[0]+=ta[i+10 ];
1027 tc[0]+=ta[i+11 ];
1028 tc[0]+=ta[i+12 ];
1029 tc[0]+=ta[i+13 ];
1030 tc[0]+=ta[i+14 ];
1031 tc[0]+=ta[i+15 ];
1032 }
1033 for( ;i<n;++i){ tc[0]+=ta[i+0 ];
1034 }
1035 }
1036 ;
1037 }
1038 else
1039 #endif
1040 return RSB_ERR_UNSUPPORTED_TYPE ;
1041 return RSB_ERR_NO_ERROR;
1042 }
1043
rsb__vector_mult_sum(const void * a,const void * b,void * c,rsb_type_t type,size_t n,const int inca,const int incb)1044 static rsb_err_t rsb__vector_mult_sum(const void * a, const void * b, void * c, rsb_type_t type, size_t n, const int inca, const int incb)
1045 {
1046 /*!
1047 * c <- sum(a*b)
1048 * It is allowed to give c == a or c == b or a==b
1049 *
1050 * \param array an array pointer
1051 * \param type a valid type code
1052 * \param n the input array length
1053 * \note see ddot in BLAS
1054 *
1055 * \return \rsberrcodemsg
1056 *
1057 * p.s.: this routine is, numerically speaking, a crime!
1058 *
1059 * */
1060 size_t i;
1061 if(a==b && inca==incb)
1062 return rsb_vector_norm_square_strided(c,a,type,n,inca);
1063 if(inca == 1 && incb == 1)
1064 {
1065 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1066 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
1067 {
1068 const double*tb = b; const double*ta = a; double*tc = c,cacc = ((double)(0));
1069 {
1070 for(i=0;i+15<n;i+=16){
1071 cacc+=ta[i+0 ]*tb[i+0 ];
1072 cacc+=ta[i+1 ]*tb[i+1 ];
1073 cacc+=ta[i+2 ]*tb[i+2 ];
1074 cacc+=ta[i+3 ]*tb[i+3 ];
1075 cacc+=ta[i+4 ]*tb[i+4 ];
1076 cacc+=ta[i+5 ]*tb[i+5 ];
1077 cacc+=ta[i+6 ]*tb[i+6 ];
1078 cacc+=ta[i+7 ]*tb[i+7 ];
1079 cacc+=ta[i+8 ]*tb[i+8 ];
1080 cacc+=ta[i+9 ]*tb[i+9 ];
1081 cacc+=ta[i+10 ]*tb[i+10 ];
1082 cacc+=ta[i+11 ]*tb[i+11 ];
1083 cacc+=ta[i+12 ]*tb[i+12 ];
1084 cacc+=ta[i+13 ]*tb[i+13 ];
1085 cacc+=ta[i+14 ]*tb[i+14 ];
1086 cacc+=ta[i+15 ]*tb[i+15 ];
1087 }
1088 for( ;i<n;++i){ cacc+=ta[i+0 ]*tb[i+0 ];
1089 }
1090 }
1091 ;
1092 *tc = cacc;
1093 }
1094 else
1095 #endif
1096 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1097 if( type == RSB_NUMERICAL_TYPE_FLOAT )
1098 {
1099 const float*tb = b; const float*ta = a; float*tc = c,cacc = ((float)(0));
1100 {
1101 for(i=0;i+15<n;i+=16){
1102 cacc+=ta[i+0 ]*tb[i+0 ];
1103 cacc+=ta[i+1 ]*tb[i+1 ];
1104 cacc+=ta[i+2 ]*tb[i+2 ];
1105 cacc+=ta[i+3 ]*tb[i+3 ];
1106 cacc+=ta[i+4 ]*tb[i+4 ];
1107 cacc+=ta[i+5 ]*tb[i+5 ];
1108 cacc+=ta[i+6 ]*tb[i+6 ];
1109 cacc+=ta[i+7 ]*tb[i+7 ];
1110 cacc+=ta[i+8 ]*tb[i+8 ];
1111 cacc+=ta[i+9 ]*tb[i+9 ];
1112 cacc+=ta[i+10 ]*tb[i+10 ];
1113 cacc+=ta[i+11 ]*tb[i+11 ];
1114 cacc+=ta[i+12 ]*tb[i+12 ];
1115 cacc+=ta[i+13 ]*tb[i+13 ];
1116 cacc+=ta[i+14 ]*tb[i+14 ];
1117 cacc+=ta[i+15 ]*tb[i+15 ];
1118 }
1119 for( ;i<n;++i){ cacc+=ta[i+0 ]*tb[i+0 ];
1120 }
1121 }
1122 ;
1123 *tc = cacc;
1124 }
1125 else
1126 #endif
1127 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1128 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
1129 {
1130 const float complex*tb = b; const float complex*ta = a; float complex*tc = c,cacc = ((float complex)(0));
1131 {
1132 for(i=0;i+15<n;i+=16){
1133 cacc+=ta[i+0 ]*tb[i+0 ];
1134 cacc+=ta[i+1 ]*tb[i+1 ];
1135 cacc+=ta[i+2 ]*tb[i+2 ];
1136 cacc+=ta[i+3 ]*tb[i+3 ];
1137 cacc+=ta[i+4 ]*tb[i+4 ];
1138 cacc+=ta[i+5 ]*tb[i+5 ];
1139 cacc+=ta[i+6 ]*tb[i+6 ];
1140 cacc+=ta[i+7 ]*tb[i+7 ];
1141 cacc+=ta[i+8 ]*tb[i+8 ];
1142 cacc+=ta[i+9 ]*tb[i+9 ];
1143 cacc+=ta[i+10 ]*tb[i+10 ];
1144 cacc+=ta[i+11 ]*tb[i+11 ];
1145 cacc+=ta[i+12 ]*tb[i+12 ];
1146 cacc+=ta[i+13 ]*tb[i+13 ];
1147 cacc+=ta[i+14 ]*tb[i+14 ];
1148 cacc+=ta[i+15 ]*tb[i+15 ];
1149 }
1150 for( ;i<n;++i){ cacc+=ta[i+0 ]*tb[i+0 ];
1151 }
1152 }
1153 ;
1154 *tc = cacc;
1155 }
1156 else
1157 #endif
1158 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1159 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1160 {
1161 const double complex*tb = b; const double complex*ta = a; double complex*tc = c,cacc = ((double complex)(0));
1162 {
1163 for(i=0;i+15<n;i+=16){
1164 cacc+=ta[i+0 ]*tb[i+0 ];
1165 cacc+=ta[i+1 ]*tb[i+1 ];
1166 cacc+=ta[i+2 ]*tb[i+2 ];
1167 cacc+=ta[i+3 ]*tb[i+3 ];
1168 cacc+=ta[i+4 ]*tb[i+4 ];
1169 cacc+=ta[i+5 ]*tb[i+5 ];
1170 cacc+=ta[i+6 ]*tb[i+6 ];
1171 cacc+=ta[i+7 ]*tb[i+7 ];
1172 cacc+=ta[i+8 ]*tb[i+8 ];
1173 cacc+=ta[i+9 ]*tb[i+9 ];
1174 cacc+=ta[i+10 ]*tb[i+10 ];
1175 cacc+=ta[i+11 ]*tb[i+11 ];
1176 cacc+=ta[i+12 ]*tb[i+12 ];
1177 cacc+=ta[i+13 ]*tb[i+13 ];
1178 cacc+=ta[i+14 ]*tb[i+14 ];
1179 cacc+=ta[i+15 ]*tb[i+15 ];
1180 }
1181 for( ;i<n;++i){ cacc+=ta[i+0 ]*tb[i+0 ];
1182 }
1183 }
1184 ;
1185 *tc = cacc;
1186 }
1187 else
1188 #endif
1189 return RSB_ERR_UNSUPPORTED_TYPE ;
1190 }
1191 else
1192 {
1193 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1194 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
1195 {
1196 const double*tb = b; const double*ta = a; double*tc = c,cacc = ((double)(0));
1197 {
1198 for(i=0;i+15<n;i+=16){
1199 cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1200 cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1201 cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1202 cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1203 cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1204 cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1205 cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1206 cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1207 cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1208 cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1209 cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1210 cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1211 cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1212 cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1213 cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1214 cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1215 }
1216 for( ;i<n;++i){ cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1217 }
1218 }
1219 ;
1220 *tc = cacc;
1221 }
1222 else
1223 #endif
1224 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1225 if( type == RSB_NUMERICAL_TYPE_FLOAT )
1226 {
1227 const float*tb = b; const float*ta = a; float*tc = c,cacc = ((float)(0));
1228 {
1229 for(i=0;i+15<n;i+=16){
1230 cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1231 cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1232 cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1233 cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1234 cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1235 cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1236 cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1237 cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1238 cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1239 cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1240 cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1241 cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1242 cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1243 cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1244 cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1245 cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1246 }
1247 for( ;i<n;++i){ cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1248 }
1249 }
1250 ;
1251 *tc = cacc;
1252 }
1253 else
1254 #endif
1255 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1256 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
1257 {
1258 const float complex*tb = b; const float complex*ta = a; float complex*tc = c,cacc = ((float complex)(0));
1259 {
1260 for(i=0;i+15<n;i+=16){
1261 cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1262 cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1263 cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1264 cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1265 cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1266 cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1267 cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1268 cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1269 cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1270 cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1271 cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1272 cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1273 cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1274 cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1275 cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1276 cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1277 }
1278 for( ;i<n;++i){ cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1279 }
1280 }
1281 ;
1282 *tc = cacc;
1283 }
1284 else
1285 #endif
1286 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1287 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1288 {
1289 const double complex*tb = b; const double complex*ta = a; double complex*tc = c,cacc = ((double complex)(0));
1290 {
1291 for(i=0;i+15<n;i+=16){
1292 cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1293 cacc+=ta[inca*(i+1 )]*tb[incb*(i+1 )];
1294 cacc+=ta[inca*(i+2 )]*tb[incb*(i+2 )];
1295 cacc+=ta[inca*(i+3 )]*tb[incb*(i+3 )];
1296 cacc+=ta[inca*(i+4 )]*tb[incb*(i+4 )];
1297 cacc+=ta[inca*(i+5 )]*tb[incb*(i+5 )];
1298 cacc+=ta[inca*(i+6 )]*tb[incb*(i+6 )];
1299 cacc+=ta[inca*(i+7 )]*tb[incb*(i+7 )];
1300 cacc+=ta[inca*(i+8 )]*tb[incb*(i+8 )];
1301 cacc+=ta[inca*(i+9 )]*tb[incb*(i+9 )];
1302 cacc+=ta[inca*(i+10 )]*tb[incb*(i+10 )];
1303 cacc+=ta[inca*(i+11 )]*tb[incb*(i+11 )];
1304 cacc+=ta[inca*(i+12 )]*tb[incb*(i+12 )];
1305 cacc+=ta[inca*(i+13 )]*tb[incb*(i+13 )];
1306 cacc+=ta[inca*(i+14 )]*tb[incb*(i+14 )];
1307 cacc+=ta[inca*(i+15 )]*tb[incb*(i+15 )];
1308 }
1309 for( ;i<n;++i){ cacc+=ta[inca*(i+0 )]*tb[incb*(i+0 )];
1310 }
1311 }
1312 ;
1313 *tc = cacc;
1314 }
1315 else
1316 #endif
1317 return RSB_ERR_UNSUPPORTED_TYPE ;
1318 }
1319 return RSB_ERR_NO_ERROR;
1320 }
1321
rsb_fill_with_zeros_nostride(void * array,rsb_type_t type,size_t n)1322 static rsb_err_t rsb_fill_with_zeros_nostride(void * array, rsb_type_t type, size_t n)
1323 {
1324 /*!
1325 * \ingroup gr_vec
1326 * Will zero the input n elements long array of type type.
1327 * \param array an array pointer
1328 * \param type a valid type code
1329 * \param n the input array length
1330 *
1331 * \return \rsberrcodemsg
1332 * */
1333 size_t i;
1334 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1335 if( type == RSB_NUMERICAL_TYPE_DOUBLE ){
1336 double*ta = array;
1337 {
1338 for(i=0;i+15<n;i+=16){
1339 ta[i+0 ] = ((double)(0));ta[i+1 ] = ((double)(0));ta[i+2 ] = ((double)(0));ta[i+3 ] = ((double)(0));ta[i+4 ] = ((double)(0));ta[i+5 ] = ((double)(0));ta[i+6 ] = ((double)(0));ta[i+7 ] = ((double)(0));ta[i+8 ] = ((double)(0));ta[i+9 ] = ((double)(0));ta[i+10 ] = ((double)(0));ta[i+11 ] = ((double)(0));ta[i+12 ] = ((double)(0));ta[i+13 ] = ((double)(0));ta[i+14 ] = ((double)(0));ta[i+15 ] = ((double)(0));}
1340 for( ;i<n;++i){ ta[i+0 ] = ((double)(0)); }
1341 }
1342 }
1343 else
1344 #endif
1345 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1346 if( type == RSB_NUMERICAL_TYPE_FLOAT ){
1347 float*ta = array;
1348 {
1349 for(i=0;i+15<n;i+=16){
1350 ta[i+0 ] = ((float)(0));ta[i+1 ] = ((float)(0));ta[i+2 ] = ((float)(0));ta[i+3 ] = ((float)(0));ta[i+4 ] = ((float)(0));ta[i+5 ] = ((float)(0));ta[i+6 ] = ((float)(0));ta[i+7 ] = ((float)(0));ta[i+8 ] = ((float)(0));ta[i+9 ] = ((float)(0));ta[i+10 ] = ((float)(0));ta[i+11 ] = ((float)(0));ta[i+12 ] = ((float)(0));ta[i+13 ] = ((float)(0));ta[i+14 ] = ((float)(0));ta[i+15 ] = ((float)(0));}
1351 for( ;i<n;++i){ ta[i+0 ] = ((float)(0)); }
1352 }
1353 }
1354 else
1355 #endif
1356 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1357 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ){
1358 float complex*ta = array;
1359 {
1360 for(i=0;i+15<n;i+=16){
1361 ta[i+0 ] = ((float complex)(0));ta[i+1 ] = ((float complex)(0));ta[i+2 ] = ((float complex)(0));ta[i+3 ] = ((float complex)(0));ta[i+4 ] = ((float complex)(0));ta[i+5 ] = ((float complex)(0));ta[i+6 ] = ((float complex)(0));ta[i+7 ] = ((float complex)(0));ta[i+8 ] = ((float complex)(0));ta[i+9 ] = ((float complex)(0));ta[i+10 ] = ((float complex)(0));ta[i+11 ] = ((float complex)(0));ta[i+12 ] = ((float complex)(0));ta[i+13 ] = ((float complex)(0));ta[i+14 ] = ((float complex)(0));ta[i+15 ] = ((float complex)(0));}
1362 for( ;i<n;++i){ ta[i+0 ] = ((float complex)(0)); }
1363 }
1364 }
1365 else
1366 #endif
1367 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1368 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ){
1369 double complex*ta = array;
1370 {
1371 for(i=0;i+15<n;i+=16){
1372 ta[i+0 ] = ((double complex)(0));ta[i+1 ] = ((double complex)(0));ta[i+2 ] = ((double complex)(0));ta[i+3 ] = ((double complex)(0));ta[i+4 ] = ((double complex)(0));ta[i+5 ] = ((double complex)(0));ta[i+6 ] = ((double complex)(0));ta[i+7 ] = ((double complex)(0));ta[i+8 ] = ((double complex)(0));ta[i+9 ] = ((double complex)(0));ta[i+10 ] = ((double complex)(0));ta[i+11 ] = ((double complex)(0));ta[i+12 ] = ((double complex)(0));ta[i+13 ] = ((double complex)(0));ta[i+14 ] = ((double complex)(0));ta[i+15 ] = ((double complex)(0));}
1373 for( ;i<n;++i){ ta[i+0 ] = ((double complex)(0)); }
1374 }
1375 }
1376 else
1377 #endif
1378 return RSB_ERR_UNSUPPORTED_TYPE ;
1379 return RSB_ERR_NO_ERROR;
1380 }
1381
rsb_fill_with_zeros(void * array,rsb_type_t type,size_t n,size_t incx)1382 static rsb_err_t rsb_fill_with_zeros(void * array, rsb_type_t type, size_t n, size_t incx)
1383 {
1384 /*!
1385 * \ingroup gr_vec
1386 * Will zero the input n elements long array of type type.
1387 * \param array an array pointer
1388 * \param type a valid type code
1389 * \param n the input array length
1390 *
1391 * \return \rsberrcodemsg
1392 * */
1393 size_t i;
1394 if(incx==1)
1395 return rsb_fill_with_zeros_nostride(array,type,n);
1396
1397 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1398 if( type == RSB_NUMERICAL_TYPE_DOUBLE ){
1399 double*ta = array;
1400 {
1401 for(i=0;i+15<n;i+=16){
1402 ta[(i+0 )*incx] = ((double)(0));ta[(i+1 )*incx] = ((double)(0));ta[(i+2 )*incx] = ((double)(0));ta[(i+3 )*incx] = ((double)(0));ta[(i+4 )*incx] = ((double)(0));ta[(i+5 )*incx] = ((double)(0));ta[(i+6 )*incx] = ((double)(0));ta[(i+7 )*incx] = ((double)(0));ta[(i+8 )*incx] = ((double)(0));ta[(i+9 )*incx] = ((double)(0));ta[(i+10 )*incx] = ((double)(0));ta[(i+11 )*incx] = ((double)(0));ta[(i+12 )*incx] = ((double)(0));ta[(i+13 )*incx] = ((double)(0));ta[(i+14 )*incx] = ((double)(0));ta[(i+15 )*incx] = ((double)(0));}
1403 for( ;i<n;++i){ ta[(i+0 )*incx] = ((double)(0)); }
1404 }
1405 }
1406 else
1407 #endif
1408 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1409 if( type == RSB_NUMERICAL_TYPE_FLOAT ){
1410 float*ta = array;
1411 {
1412 for(i=0;i+15<n;i+=16){
1413 ta[(i+0 )*incx] = ((float)(0));ta[(i+1 )*incx] = ((float)(0));ta[(i+2 )*incx] = ((float)(0));ta[(i+3 )*incx] = ((float)(0));ta[(i+4 )*incx] = ((float)(0));ta[(i+5 )*incx] = ((float)(0));ta[(i+6 )*incx] = ((float)(0));ta[(i+7 )*incx] = ((float)(0));ta[(i+8 )*incx] = ((float)(0));ta[(i+9 )*incx] = ((float)(0));ta[(i+10 )*incx] = ((float)(0));ta[(i+11 )*incx] = ((float)(0));ta[(i+12 )*incx] = ((float)(0));ta[(i+13 )*incx] = ((float)(0));ta[(i+14 )*incx] = ((float)(0));ta[(i+15 )*incx] = ((float)(0));}
1414 for( ;i<n;++i){ ta[(i+0 )*incx] = ((float)(0)); }
1415 }
1416 }
1417 else
1418 #endif
1419 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1420 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ){
1421 float complex*ta = array;
1422 {
1423 for(i=0;i+15<n;i+=16){
1424 ta[(i+0 )*incx] = ((float complex)(0));ta[(i+1 )*incx] = ((float complex)(0));ta[(i+2 )*incx] = ((float complex)(0));ta[(i+3 )*incx] = ((float complex)(0));ta[(i+4 )*incx] = ((float complex)(0));ta[(i+5 )*incx] = ((float complex)(0));ta[(i+6 )*incx] = ((float complex)(0));ta[(i+7 )*incx] = ((float complex)(0));ta[(i+8 )*incx] = ((float complex)(0));ta[(i+9 )*incx] = ((float complex)(0));ta[(i+10 )*incx] = ((float complex)(0));ta[(i+11 )*incx] = ((float complex)(0));ta[(i+12 )*incx] = ((float complex)(0));ta[(i+13 )*incx] = ((float complex)(0));ta[(i+14 )*incx] = ((float complex)(0));ta[(i+15 )*incx] = ((float complex)(0));}
1425 for( ;i<n;++i){ ta[(i+0 )*incx] = ((float complex)(0)); }
1426 }
1427 }
1428 else
1429 #endif
1430 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1431 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ){
1432 double complex*ta = array;
1433 {
1434 for(i=0;i+15<n;i+=16){
1435 ta[(i+0 )*incx] = ((double complex)(0));ta[(i+1 )*incx] = ((double complex)(0));ta[(i+2 )*incx] = ((double complex)(0));ta[(i+3 )*incx] = ((double complex)(0));ta[(i+4 )*incx] = ((double complex)(0));ta[(i+5 )*incx] = ((double complex)(0));ta[(i+6 )*incx] = ((double complex)(0));ta[(i+7 )*incx] = ((double complex)(0));ta[(i+8 )*incx] = ((double complex)(0));ta[(i+9 )*incx] = ((double complex)(0));ta[(i+10 )*incx] = ((double complex)(0));ta[(i+11 )*incx] = ((double complex)(0));ta[(i+12 )*incx] = ((double complex)(0));ta[(i+13 )*incx] = ((double complex)(0));ta[(i+14 )*incx] = ((double complex)(0));ta[(i+15 )*incx] = ((double complex)(0));}
1436 for( ;i<n;++i){ ta[(i+0 )*incx] = ((double complex)(0)); }
1437 }
1438 }
1439 else
1440 #endif
1441 return RSB_ERR_UNSUPPORTED_TYPE ;
1442 return RSB_ERR_NO_ERROR;
1443 }
1444
rsb_vector_scale(void * a,const void * alphap,rsb_type_t type,size_t n)1445 static rsb_err_t rsb_vector_scale(void * a, const void * alphap, rsb_type_t type, size_t n)
1446 {
1447 /*!
1448 * a <- a * alpha
1449 *
1450 * \param array an array pointer
1451 * \param type a valid type code
1452 * \param alphap scaling value (if NULL assumed to be zero)
1453 * \param n the input array length
1454 * \note see dscal in BLAS
1455 *
1456 * \return \rsberrcodemsg
1457 * */
1458 size_t i;
1459 if(alphap==NULL || RSB_IS_ELEMENT_ZERO(alphap,type))
1460 return rsb_fill_with_zeros(a,type,n,1);
1461
1462 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1463 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
1464 {
1465 const double alpha = *(double*)alphap; double*ta = a;
1466 {
1467 for(i=0;i+15<n;i+=16){
1468 ta[i+0 ]*=alpha;
1469 ta[i+1 ]*=alpha;
1470 ta[i+2 ]*=alpha;
1471 ta[i+3 ]*=alpha;
1472 ta[i+4 ]*=alpha;
1473 ta[i+5 ]*=alpha;
1474 ta[i+6 ]*=alpha;
1475 ta[i+7 ]*=alpha;
1476 ta[i+8 ]*=alpha;
1477 ta[i+9 ]*=alpha;
1478 ta[i+10 ]*=alpha;
1479 ta[i+11 ]*=alpha;
1480 ta[i+12 ]*=alpha;
1481 ta[i+13 ]*=alpha;
1482 ta[i+14 ]*=alpha;
1483 ta[i+15 ]*=alpha;
1484 }
1485 for( ;i<n;++i){ ta[i+0 ]*=alpha;
1486 }
1487 }
1488 ;
1489 }
1490 else
1491 #endif
1492 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1493 if( type == RSB_NUMERICAL_TYPE_FLOAT )
1494 {
1495 const float alpha = *(float*)alphap; float*ta = a;
1496 {
1497 for(i=0;i+15<n;i+=16){
1498 ta[i+0 ]*=alpha;
1499 ta[i+1 ]*=alpha;
1500 ta[i+2 ]*=alpha;
1501 ta[i+3 ]*=alpha;
1502 ta[i+4 ]*=alpha;
1503 ta[i+5 ]*=alpha;
1504 ta[i+6 ]*=alpha;
1505 ta[i+7 ]*=alpha;
1506 ta[i+8 ]*=alpha;
1507 ta[i+9 ]*=alpha;
1508 ta[i+10 ]*=alpha;
1509 ta[i+11 ]*=alpha;
1510 ta[i+12 ]*=alpha;
1511 ta[i+13 ]*=alpha;
1512 ta[i+14 ]*=alpha;
1513 ta[i+15 ]*=alpha;
1514 }
1515 for( ;i<n;++i){ ta[i+0 ]*=alpha;
1516 }
1517 }
1518 ;
1519 }
1520 else
1521 #endif
1522 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1523 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
1524 {
1525 const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1526 {
1527 for(i=0;i+15<n;i+=16){
1528 ta[i+0 ]*=alpha;
1529 ta[i+1 ]*=alpha;
1530 ta[i+2 ]*=alpha;
1531 ta[i+3 ]*=alpha;
1532 ta[i+4 ]*=alpha;
1533 ta[i+5 ]*=alpha;
1534 ta[i+6 ]*=alpha;
1535 ta[i+7 ]*=alpha;
1536 ta[i+8 ]*=alpha;
1537 ta[i+9 ]*=alpha;
1538 ta[i+10 ]*=alpha;
1539 ta[i+11 ]*=alpha;
1540 ta[i+12 ]*=alpha;
1541 ta[i+13 ]*=alpha;
1542 ta[i+14 ]*=alpha;
1543 ta[i+15 ]*=alpha;
1544 }
1545 for( ;i<n;++i){ ta[i+0 ]*=alpha;
1546 }
1547 }
1548 ;
1549 }
1550 else
1551 #endif
1552 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1553 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1554 {
1555 const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1556 {
1557 for(i=0;i+15<n;i+=16){
1558 ta[i+0 ]*=alpha;
1559 ta[i+1 ]*=alpha;
1560 ta[i+2 ]*=alpha;
1561 ta[i+3 ]*=alpha;
1562 ta[i+4 ]*=alpha;
1563 ta[i+5 ]*=alpha;
1564 ta[i+6 ]*=alpha;
1565 ta[i+7 ]*=alpha;
1566 ta[i+8 ]*=alpha;
1567 ta[i+9 ]*=alpha;
1568 ta[i+10 ]*=alpha;
1569 ta[i+11 ]*=alpha;
1570 ta[i+12 ]*=alpha;
1571 ta[i+13 ]*=alpha;
1572 ta[i+14 ]*=alpha;
1573 ta[i+15 ]*=alpha;
1574 }
1575 for( ;i<n;++i){ ta[i+0 ]*=alpha;
1576 }
1577 }
1578 ;
1579 }
1580 else
1581 #endif
1582 return RSB_ERR_UNSUPPORTED_TYPE ;
1583 return RSB_ERR_NO_ERROR;
1584 }
1585
rsb_strided_vector_scale(void * a,const void * alphap,rsb_type_t type,size_t n,size_t stride)1586 static rsb_err_t rsb_strided_vector_scale(void * a, const void * alphap, rsb_type_t type, size_t n, size_t stride)
1587 {
1588 /*!
1589 * a <- a * alpha
1590 *
1591 * \param array an array pointer
1592 * \param type a valid type code
1593 * \param n the input array length
1594 * \note see dscal in BLAS
1595 *
1596 * \return \rsberrcodemsg
1597 * */
1598 size_t i;
1599 if(stride==1)
1600 return rsb_vector_scale(a,alphap,type,n);
1601 if(alphap==NULL || RSB_IS_ELEMENT_ZERO(alphap,type))
1602 return rsb_fill_with_zeros(a,type,n,stride);
1603
1604 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1605 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
1606 {
1607 const double alpha = *(double*)alphap; double*ta = a;
1608 {
1609 for(i=0;i+15<n;i+=16){
1610 ta[stride*(i+0 )]*=alpha;
1611 ta[stride*(i+1 )]*=alpha;
1612 ta[stride*(i+2 )]*=alpha;
1613 ta[stride*(i+3 )]*=alpha;
1614 ta[stride*(i+4 )]*=alpha;
1615 ta[stride*(i+5 )]*=alpha;
1616 ta[stride*(i+6 )]*=alpha;
1617 ta[stride*(i+7 )]*=alpha;
1618 ta[stride*(i+8 )]*=alpha;
1619 ta[stride*(i+9 )]*=alpha;
1620 ta[stride*(i+10 )]*=alpha;
1621 ta[stride*(i+11 )]*=alpha;
1622 ta[stride*(i+12 )]*=alpha;
1623 ta[stride*(i+13 )]*=alpha;
1624 ta[stride*(i+14 )]*=alpha;
1625 ta[stride*(i+15 )]*=alpha;
1626 }
1627 for( ;i<n;++i){ ta[stride*(i+0 )]*=alpha;
1628 }
1629 }
1630 ;
1631 }
1632 else
1633 #endif
1634 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1635 if( type == RSB_NUMERICAL_TYPE_FLOAT )
1636 {
1637 const float alpha = *(float*)alphap; float*ta = a;
1638 {
1639 for(i=0;i+15<n;i+=16){
1640 ta[stride*(i+0 )]*=alpha;
1641 ta[stride*(i+1 )]*=alpha;
1642 ta[stride*(i+2 )]*=alpha;
1643 ta[stride*(i+3 )]*=alpha;
1644 ta[stride*(i+4 )]*=alpha;
1645 ta[stride*(i+5 )]*=alpha;
1646 ta[stride*(i+6 )]*=alpha;
1647 ta[stride*(i+7 )]*=alpha;
1648 ta[stride*(i+8 )]*=alpha;
1649 ta[stride*(i+9 )]*=alpha;
1650 ta[stride*(i+10 )]*=alpha;
1651 ta[stride*(i+11 )]*=alpha;
1652 ta[stride*(i+12 )]*=alpha;
1653 ta[stride*(i+13 )]*=alpha;
1654 ta[stride*(i+14 )]*=alpha;
1655 ta[stride*(i+15 )]*=alpha;
1656 }
1657 for( ;i<n;++i){ ta[stride*(i+0 )]*=alpha;
1658 }
1659 }
1660 ;
1661 }
1662 else
1663 #endif
1664 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1665 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
1666 {
1667 const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1668 {
1669 for(i=0;i+15<n;i+=16){
1670 ta[stride*(i+0 )]*=alpha;
1671 ta[stride*(i+1 )]*=alpha;
1672 ta[stride*(i+2 )]*=alpha;
1673 ta[stride*(i+3 )]*=alpha;
1674 ta[stride*(i+4 )]*=alpha;
1675 ta[stride*(i+5 )]*=alpha;
1676 ta[stride*(i+6 )]*=alpha;
1677 ta[stride*(i+7 )]*=alpha;
1678 ta[stride*(i+8 )]*=alpha;
1679 ta[stride*(i+9 )]*=alpha;
1680 ta[stride*(i+10 )]*=alpha;
1681 ta[stride*(i+11 )]*=alpha;
1682 ta[stride*(i+12 )]*=alpha;
1683 ta[stride*(i+13 )]*=alpha;
1684 ta[stride*(i+14 )]*=alpha;
1685 ta[stride*(i+15 )]*=alpha;
1686 }
1687 for( ;i<n;++i){ ta[stride*(i+0 )]*=alpha;
1688 }
1689 }
1690 ;
1691 }
1692 else
1693 #endif
1694 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1695 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1696 {
1697 const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1698 {
1699 for(i=0;i+15<n;i+=16){
1700 ta[stride*(i+0 )]*=alpha;
1701 ta[stride*(i+1 )]*=alpha;
1702 ta[stride*(i+2 )]*=alpha;
1703 ta[stride*(i+3 )]*=alpha;
1704 ta[stride*(i+4 )]*=alpha;
1705 ta[stride*(i+5 )]*=alpha;
1706 ta[stride*(i+6 )]*=alpha;
1707 ta[stride*(i+7 )]*=alpha;
1708 ta[stride*(i+8 )]*=alpha;
1709 ta[stride*(i+9 )]*=alpha;
1710 ta[stride*(i+10 )]*=alpha;
1711 ta[stride*(i+11 )]*=alpha;
1712 ta[stride*(i+12 )]*=alpha;
1713 ta[stride*(i+13 )]*=alpha;
1714 ta[stride*(i+14 )]*=alpha;
1715 ta[stride*(i+15 )]*=alpha;
1716 }
1717 for( ;i<n;++i){ ta[stride*(i+0 )]*=alpha;
1718 }
1719 }
1720 ;
1721 }
1722 else
1723 #endif
1724 return RSB_ERR_UNSUPPORTED_TYPE ;
1725 return RSB_ERR_NO_ERROR;
1726 }
1727
rsb__util_vector_add(void * a,const void * alphap,rsb_type_t type,size_t n)1728 rsb_err_t rsb__util_vector_add(void * a, const void * alphap, rsb_type_t type, size_t n)
1729 {
1730 /*!
1731 * a <- a + alpha
1732 *
1733 * \return \rsberrcodemsg
1734 * */
1735 size_t i;
1736
1737 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1738 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
1739 {
1740 const double alpha = *(double*)alphap; double*ta = a;
1741 {
1742 for(i=0;i+15<n;i+=16){
1743 ta[i+0 ]+=alpha;
1744 ta[i+1 ]+=alpha;
1745 ta[i+2 ]+=alpha;
1746 ta[i+3 ]+=alpha;
1747 ta[i+4 ]+=alpha;
1748 ta[i+5 ]+=alpha;
1749 ta[i+6 ]+=alpha;
1750 ta[i+7 ]+=alpha;
1751 ta[i+8 ]+=alpha;
1752 ta[i+9 ]+=alpha;
1753 ta[i+10 ]+=alpha;
1754 ta[i+11 ]+=alpha;
1755 ta[i+12 ]+=alpha;
1756 ta[i+13 ]+=alpha;
1757 ta[i+14 ]+=alpha;
1758 ta[i+15 ]+=alpha;
1759 }
1760 for( ;i<n;++i){ ta[i+0 ]+=alpha;
1761 }
1762 }
1763 ;
1764 }
1765 else
1766 #endif
1767 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1768 if( type == RSB_NUMERICAL_TYPE_FLOAT )
1769 {
1770 const float alpha = *(float*)alphap; float*ta = a;
1771 {
1772 for(i=0;i+15<n;i+=16){
1773 ta[i+0 ]+=alpha;
1774 ta[i+1 ]+=alpha;
1775 ta[i+2 ]+=alpha;
1776 ta[i+3 ]+=alpha;
1777 ta[i+4 ]+=alpha;
1778 ta[i+5 ]+=alpha;
1779 ta[i+6 ]+=alpha;
1780 ta[i+7 ]+=alpha;
1781 ta[i+8 ]+=alpha;
1782 ta[i+9 ]+=alpha;
1783 ta[i+10 ]+=alpha;
1784 ta[i+11 ]+=alpha;
1785 ta[i+12 ]+=alpha;
1786 ta[i+13 ]+=alpha;
1787 ta[i+14 ]+=alpha;
1788 ta[i+15 ]+=alpha;
1789 }
1790 for( ;i<n;++i){ ta[i+0 ]+=alpha;
1791 }
1792 }
1793 ;
1794 }
1795 else
1796 #endif
1797 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1798 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
1799 {
1800 const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1801 {
1802 for(i=0;i+15<n;i+=16){
1803 ta[i+0 ]+=alpha;
1804 ta[i+1 ]+=alpha;
1805 ta[i+2 ]+=alpha;
1806 ta[i+3 ]+=alpha;
1807 ta[i+4 ]+=alpha;
1808 ta[i+5 ]+=alpha;
1809 ta[i+6 ]+=alpha;
1810 ta[i+7 ]+=alpha;
1811 ta[i+8 ]+=alpha;
1812 ta[i+9 ]+=alpha;
1813 ta[i+10 ]+=alpha;
1814 ta[i+11 ]+=alpha;
1815 ta[i+12 ]+=alpha;
1816 ta[i+13 ]+=alpha;
1817 ta[i+14 ]+=alpha;
1818 ta[i+15 ]+=alpha;
1819 }
1820 for( ;i<n;++i){ ta[i+0 ]+=alpha;
1821 }
1822 }
1823 ;
1824 }
1825 else
1826 #endif
1827 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1828 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1829 {
1830 const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1831 {
1832 for(i=0;i+15<n;i+=16){
1833 ta[i+0 ]+=alpha;
1834 ta[i+1 ]+=alpha;
1835 ta[i+2 ]+=alpha;
1836 ta[i+3 ]+=alpha;
1837 ta[i+4 ]+=alpha;
1838 ta[i+5 ]+=alpha;
1839 ta[i+6 ]+=alpha;
1840 ta[i+7 ]+=alpha;
1841 ta[i+8 ]+=alpha;
1842 ta[i+9 ]+=alpha;
1843 ta[i+10 ]+=alpha;
1844 ta[i+11 ]+=alpha;
1845 ta[i+12 ]+=alpha;
1846 ta[i+13 ]+=alpha;
1847 ta[i+14 ]+=alpha;
1848 ta[i+15 ]+=alpha;
1849 }
1850 for( ;i<n;++i){ ta[i+0 ]+=alpha;
1851 }
1852 }
1853 ;
1854 }
1855 else
1856 #endif
1857 return RSB_ERR_UNSUPPORTED_TYPE ;
1858 return RSB_ERR_NO_ERROR;
1859 }
1860
rsb__util_vector_div(void * a,const void * alphap,rsb_type_t type,size_t n)1861 rsb_err_t rsb__util_vector_div(void * a, const void * alphap, rsb_type_t type, size_t n)
1862 {
1863 /*!
1864 * this is a benchmark-oriented function only..
1865 * \return \rsberrcodemsg
1866 * */
1867 size_t i;
1868
1869 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
1870 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
1871 {
1872 const double alpha = *(double*)alphap; double*ta = a;
1873 {
1874 for(i=0;i+15<n;i+=16){
1875 ta[i+0 ]/=alpha;
1876 ta[i+1 ]/=alpha;
1877 ta[i+2 ]/=alpha;
1878 ta[i+3 ]/=alpha;
1879 ta[i+4 ]/=alpha;
1880 ta[i+5 ]/=alpha;
1881 ta[i+6 ]/=alpha;
1882 ta[i+7 ]/=alpha;
1883 ta[i+8 ]/=alpha;
1884 ta[i+9 ]/=alpha;
1885 ta[i+10 ]/=alpha;
1886 ta[i+11 ]/=alpha;
1887 ta[i+12 ]/=alpha;
1888 ta[i+13 ]/=alpha;
1889 ta[i+14 ]/=alpha;
1890 ta[i+15 ]/=alpha;
1891 }
1892 for( ;i<n;++i){ ta[i+0 ]/=alpha;
1893 }
1894 }
1895 ;
1896 }
1897 else
1898 #endif
1899 #ifdef RSB_NUMERICAL_TYPE_FLOAT
1900 if( type == RSB_NUMERICAL_TYPE_FLOAT )
1901 {
1902 const float alpha = *(float*)alphap; float*ta = a;
1903 {
1904 for(i=0;i+15<n;i+=16){
1905 ta[i+0 ]/=alpha;
1906 ta[i+1 ]/=alpha;
1907 ta[i+2 ]/=alpha;
1908 ta[i+3 ]/=alpha;
1909 ta[i+4 ]/=alpha;
1910 ta[i+5 ]/=alpha;
1911 ta[i+6 ]/=alpha;
1912 ta[i+7 ]/=alpha;
1913 ta[i+8 ]/=alpha;
1914 ta[i+9 ]/=alpha;
1915 ta[i+10 ]/=alpha;
1916 ta[i+11 ]/=alpha;
1917 ta[i+12 ]/=alpha;
1918 ta[i+13 ]/=alpha;
1919 ta[i+14 ]/=alpha;
1920 ta[i+15 ]/=alpha;
1921 }
1922 for( ;i<n;++i){ ta[i+0 ]/=alpha;
1923 }
1924 }
1925 ;
1926 }
1927 else
1928 #endif
1929 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
1930 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
1931 {
1932 const float complex alpha = *(float complex*)alphap; float complex*ta = a;
1933 {
1934 for(i=0;i+15<n;i+=16){
1935 ta[i+0 ]/=alpha;
1936 ta[i+1 ]/=alpha;
1937 ta[i+2 ]/=alpha;
1938 ta[i+3 ]/=alpha;
1939 ta[i+4 ]/=alpha;
1940 ta[i+5 ]/=alpha;
1941 ta[i+6 ]/=alpha;
1942 ta[i+7 ]/=alpha;
1943 ta[i+8 ]/=alpha;
1944 ta[i+9 ]/=alpha;
1945 ta[i+10 ]/=alpha;
1946 ta[i+11 ]/=alpha;
1947 ta[i+12 ]/=alpha;
1948 ta[i+13 ]/=alpha;
1949 ta[i+14 ]/=alpha;
1950 ta[i+15 ]/=alpha;
1951 }
1952 for( ;i<n;++i){ ta[i+0 ]/=alpha;
1953 }
1954 }
1955 ;
1956 }
1957 else
1958 #endif
1959 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
1960 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
1961 {
1962 const double complex alpha = *(double complex*)alphap; double complex*ta = a;
1963 {
1964 for(i=0;i+15<n;i+=16){
1965 ta[i+0 ]/=alpha;
1966 ta[i+1 ]/=alpha;
1967 ta[i+2 ]/=alpha;
1968 ta[i+3 ]/=alpha;
1969 ta[i+4 ]/=alpha;
1970 ta[i+5 ]/=alpha;
1971 ta[i+6 ]/=alpha;
1972 ta[i+7 ]/=alpha;
1973 ta[i+8 ]/=alpha;
1974 ta[i+9 ]/=alpha;
1975 ta[i+10 ]/=alpha;
1976 ta[i+11 ]/=alpha;
1977 ta[i+12 ]/=alpha;
1978 ta[i+13 ]/=alpha;
1979 ta[i+14 ]/=alpha;
1980 ta[i+15 ]/=alpha;
1981 }
1982 for( ;i<n;++i){ ta[i+0 ]/=alpha;
1983 }
1984 }
1985 ;
1986 }
1987 else
1988 #endif
1989 return RSB_ERR_UNSUPPORTED_TYPE ;
1990 return RSB_ERR_NO_ERROR;
1991 }
1992
rsb__vector_increase_by_one(void * a,rsb_type_t type,size_t n)1993 rsb_err_t rsb__vector_increase_by_one(void * a, rsb_type_t type, size_t n)
1994 {
1995 /*!
1996 * \return \rsberrcodemsg
1997 * */
1998 size_t i;
1999
2000 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2001 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2002 { double*ta = a;
2003 {
2004 for(i=0;i+15<n;i+=16){
2005 ta[i+0 ]+=((double)(1.0));
2006 ta[i+1 ]+=((double)(1.0));
2007 ta[i+2 ]+=((double)(1.0));
2008 ta[i+3 ]+=((double)(1.0));
2009 ta[i+4 ]+=((double)(1.0));
2010 ta[i+5 ]+=((double)(1.0));
2011 ta[i+6 ]+=((double)(1.0));
2012 ta[i+7 ]+=((double)(1.0));
2013 ta[i+8 ]+=((double)(1.0));
2014 ta[i+9 ]+=((double)(1.0));
2015 ta[i+10 ]+=((double)(1.0));
2016 ta[i+11 ]+=((double)(1.0));
2017 ta[i+12 ]+=((double)(1.0));
2018 ta[i+13 ]+=((double)(1.0));
2019 ta[i+14 ]+=((double)(1.0));
2020 ta[i+15 ]+=((double)(1.0));
2021 }
2022 for( ;i<n;++i){ ta[i+0 ]+=((double)(1.0));
2023 }
2024 }
2025 ;
2026 }
2027 else
2028 #endif
2029 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2030 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2031 { float*ta = a;
2032 {
2033 for(i=0;i+15<n;i+=16){
2034 ta[i+0 ]+=((float)(1.0));
2035 ta[i+1 ]+=((float)(1.0));
2036 ta[i+2 ]+=((float)(1.0));
2037 ta[i+3 ]+=((float)(1.0));
2038 ta[i+4 ]+=((float)(1.0));
2039 ta[i+5 ]+=((float)(1.0));
2040 ta[i+6 ]+=((float)(1.0));
2041 ta[i+7 ]+=((float)(1.0));
2042 ta[i+8 ]+=((float)(1.0));
2043 ta[i+9 ]+=((float)(1.0));
2044 ta[i+10 ]+=((float)(1.0));
2045 ta[i+11 ]+=((float)(1.0));
2046 ta[i+12 ]+=((float)(1.0));
2047 ta[i+13 ]+=((float)(1.0));
2048 ta[i+14 ]+=((float)(1.0));
2049 ta[i+15 ]+=((float)(1.0));
2050 }
2051 for( ;i<n;++i){ ta[i+0 ]+=((float)(1.0));
2052 }
2053 }
2054 ;
2055 }
2056 else
2057 #endif
2058 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2059 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2060 { float complex*ta = a;
2061 {
2062 for(i=0;i+15<n;i+=16){
2063 ta[i+0 ]+=((float complex)(1.0));
2064 ta[i+1 ]+=((float complex)(1.0));
2065 ta[i+2 ]+=((float complex)(1.0));
2066 ta[i+3 ]+=((float complex)(1.0));
2067 ta[i+4 ]+=((float complex)(1.0));
2068 ta[i+5 ]+=((float complex)(1.0));
2069 ta[i+6 ]+=((float complex)(1.0));
2070 ta[i+7 ]+=((float complex)(1.0));
2071 ta[i+8 ]+=((float complex)(1.0));
2072 ta[i+9 ]+=((float complex)(1.0));
2073 ta[i+10 ]+=((float complex)(1.0));
2074 ta[i+11 ]+=((float complex)(1.0));
2075 ta[i+12 ]+=((float complex)(1.0));
2076 ta[i+13 ]+=((float complex)(1.0));
2077 ta[i+14 ]+=((float complex)(1.0));
2078 ta[i+15 ]+=((float complex)(1.0));
2079 }
2080 for( ;i<n;++i){ ta[i+0 ]+=((float complex)(1.0));
2081 }
2082 }
2083 ;
2084 }
2085 else
2086 #endif
2087 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2088 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2089 { double complex*ta = a;
2090 {
2091 for(i=0;i+15<n;i+=16){
2092 ta[i+0 ]+=((double complex)(1.0));
2093 ta[i+1 ]+=((double complex)(1.0));
2094 ta[i+2 ]+=((double complex)(1.0));
2095 ta[i+3 ]+=((double complex)(1.0));
2096 ta[i+4 ]+=((double complex)(1.0));
2097 ta[i+5 ]+=((double complex)(1.0));
2098 ta[i+6 ]+=((double complex)(1.0));
2099 ta[i+7 ]+=((double complex)(1.0));
2100 ta[i+8 ]+=((double complex)(1.0));
2101 ta[i+9 ]+=((double complex)(1.0));
2102 ta[i+10 ]+=((double complex)(1.0));
2103 ta[i+11 ]+=((double complex)(1.0));
2104 ta[i+12 ]+=((double complex)(1.0));
2105 ta[i+13 ]+=((double complex)(1.0));
2106 ta[i+14 ]+=((double complex)(1.0));
2107 ta[i+15 ]+=((double complex)(1.0));
2108 }
2109 for( ;i<n;++i){ ta[i+0 ]+=((double complex)(1.0));
2110 }
2111 }
2112 ;
2113 }
2114 else
2115 #endif
2116 return RSB_ERR_UNSUPPORTED_TYPE ;
2117 return RSB_ERR_NO_ERROR;
2118 }
2119
rsb__util_vector_pow(void * a,rsb_type_t type,const void * y,size_t n)2120 rsb_err_t rsb__util_vector_pow(void * a, rsb_type_t type, const void *y, size_t n)
2121 {
2122 /*!
2123 * \return \rsberrcodemsg
2124 * */
2125 size_t i;
2126 if(!a || !y)
2127 goto err;
2128 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2129 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2130 {
2131 double ty = *(double*)y,*ta = a;
2132 {
2133 for(i=0;i+15<n;i+=16){
2134 ta[i+0 ] = pow(ta[i+0 ],ty);
2135 ta[i+1 ] = pow(ta[i+1 ],ty);
2136 ta[i+2 ] = pow(ta[i+2 ],ty);
2137 ta[i+3 ] = pow(ta[i+3 ],ty);
2138 ta[i+4 ] = pow(ta[i+4 ],ty);
2139 ta[i+5 ] = pow(ta[i+5 ],ty);
2140 ta[i+6 ] = pow(ta[i+6 ],ty);
2141 ta[i+7 ] = pow(ta[i+7 ],ty);
2142 ta[i+8 ] = pow(ta[i+8 ],ty);
2143 ta[i+9 ] = pow(ta[i+9 ],ty);
2144 ta[i+10 ] = pow(ta[i+10 ],ty);
2145 ta[i+11 ] = pow(ta[i+11 ],ty);
2146 ta[i+12 ] = pow(ta[i+12 ],ty);
2147 ta[i+13 ] = pow(ta[i+13 ],ty);
2148 ta[i+14 ] = pow(ta[i+14 ],ty);
2149 ta[i+15 ] = pow(ta[i+15 ],ty);
2150 }
2151 for( ;i<n;++i){ ta[i+0 ] = pow(ta[i+0 ],ty);
2152 }
2153 }
2154 ;
2155 }
2156 else
2157 #endif
2158 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2159 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2160 {
2161 float ty = *(float*)y,*ta = a;
2162 {
2163 for(i=0;i+15<n;i+=16){
2164 ta[i+0 ] = powf(ta[i+0 ],ty);
2165 ta[i+1 ] = powf(ta[i+1 ],ty);
2166 ta[i+2 ] = powf(ta[i+2 ],ty);
2167 ta[i+3 ] = powf(ta[i+3 ],ty);
2168 ta[i+4 ] = powf(ta[i+4 ],ty);
2169 ta[i+5 ] = powf(ta[i+5 ],ty);
2170 ta[i+6 ] = powf(ta[i+6 ],ty);
2171 ta[i+7 ] = powf(ta[i+7 ],ty);
2172 ta[i+8 ] = powf(ta[i+8 ],ty);
2173 ta[i+9 ] = powf(ta[i+9 ],ty);
2174 ta[i+10 ] = powf(ta[i+10 ],ty);
2175 ta[i+11 ] = powf(ta[i+11 ],ty);
2176 ta[i+12 ] = powf(ta[i+12 ],ty);
2177 ta[i+13 ] = powf(ta[i+13 ],ty);
2178 ta[i+14 ] = powf(ta[i+14 ],ty);
2179 ta[i+15 ] = powf(ta[i+15 ],ty);
2180 }
2181 for( ;i<n;++i){ ta[i+0 ] = powf(ta[i+0 ],ty);
2182 }
2183 }
2184 ;
2185 }
2186 else
2187 #endif
2188 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2189 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2190 {
2191 float complex ty = *(float complex*)y,*ta = a;
2192 {
2193 for(i=0;i+15<n;i+=16){
2194 ta[i+0 ] = cpowf(ta[i+0 ],ty);
2195 ta[i+1 ] = cpowf(ta[i+1 ],ty);
2196 ta[i+2 ] = cpowf(ta[i+2 ],ty);
2197 ta[i+3 ] = cpowf(ta[i+3 ],ty);
2198 ta[i+4 ] = cpowf(ta[i+4 ],ty);
2199 ta[i+5 ] = cpowf(ta[i+5 ],ty);
2200 ta[i+6 ] = cpowf(ta[i+6 ],ty);
2201 ta[i+7 ] = cpowf(ta[i+7 ],ty);
2202 ta[i+8 ] = cpowf(ta[i+8 ],ty);
2203 ta[i+9 ] = cpowf(ta[i+9 ],ty);
2204 ta[i+10 ] = cpowf(ta[i+10 ],ty);
2205 ta[i+11 ] = cpowf(ta[i+11 ],ty);
2206 ta[i+12 ] = cpowf(ta[i+12 ],ty);
2207 ta[i+13 ] = cpowf(ta[i+13 ],ty);
2208 ta[i+14 ] = cpowf(ta[i+14 ],ty);
2209 ta[i+15 ] = cpowf(ta[i+15 ],ty);
2210 }
2211 for( ;i<n;++i){ ta[i+0 ] = cpowf(ta[i+0 ],ty);
2212 }
2213 }
2214 ;
2215 }
2216 else
2217 #endif
2218 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2219 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2220 {
2221 double complex ty = *(double complex*)y,*ta = a;
2222 {
2223 for(i=0;i+15<n;i+=16){
2224 ta[i+0 ] = cpow(ta[i+0 ],ty);
2225 ta[i+1 ] = cpow(ta[i+1 ],ty);
2226 ta[i+2 ] = cpow(ta[i+2 ],ty);
2227 ta[i+3 ] = cpow(ta[i+3 ],ty);
2228 ta[i+4 ] = cpow(ta[i+4 ],ty);
2229 ta[i+5 ] = cpow(ta[i+5 ],ty);
2230 ta[i+6 ] = cpow(ta[i+6 ],ty);
2231 ta[i+7 ] = cpow(ta[i+7 ],ty);
2232 ta[i+8 ] = cpow(ta[i+8 ],ty);
2233 ta[i+9 ] = cpow(ta[i+9 ],ty);
2234 ta[i+10 ] = cpow(ta[i+10 ],ty);
2235 ta[i+11 ] = cpow(ta[i+11 ],ty);
2236 ta[i+12 ] = cpow(ta[i+12 ],ty);
2237 ta[i+13 ] = cpow(ta[i+13 ],ty);
2238 ta[i+14 ] = cpow(ta[i+14 ],ty);
2239 ta[i+15 ] = cpow(ta[i+15 ],ty);
2240 }
2241 for( ;i<n;++i){ ta[i+0 ] = cpow(ta[i+0 ],ty);
2242 }
2243 }
2244 ;
2245 }
2246 else
2247 #endif
2248 return RSB_ERR_UNSUPPORTED_TYPE ;
2249 err:
2250 return RSB_ERR_NO_ERROR;
2251 }
2252
rsb__util_vector_sqrt(void * a,rsb_type_t type,size_t n)2253 rsb_err_t rsb__util_vector_sqrt(void * a, rsb_type_t type, size_t n)
2254 {
2255 /*!
2256 * \return \rsberrcodemsg
2257 * */
2258 size_t i;
2259 if(!a)goto err;
2260 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2261 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2262 {double*ta = a;
2263 {
2264 for(i=0;i+15<n;i+=16){
2265 ta[i+0 ] = sqrt(ta[i+0 ]);
2266 ta[i+1 ] = sqrt(ta[i+1 ]);
2267 ta[i+2 ] = sqrt(ta[i+2 ]);
2268 ta[i+3 ] = sqrt(ta[i+3 ]);
2269 ta[i+4 ] = sqrt(ta[i+4 ]);
2270 ta[i+5 ] = sqrt(ta[i+5 ]);
2271 ta[i+6 ] = sqrt(ta[i+6 ]);
2272 ta[i+7 ] = sqrt(ta[i+7 ]);
2273 ta[i+8 ] = sqrt(ta[i+8 ]);
2274 ta[i+9 ] = sqrt(ta[i+9 ]);
2275 ta[i+10 ] = sqrt(ta[i+10 ]);
2276 ta[i+11 ] = sqrt(ta[i+11 ]);
2277 ta[i+12 ] = sqrt(ta[i+12 ]);
2278 ta[i+13 ] = sqrt(ta[i+13 ]);
2279 ta[i+14 ] = sqrt(ta[i+14 ]);
2280 ta[i+15 ] = sqrt(ta[i+15 ]);
2281 }
2282 for( ;i<n;++i){ ta[i+0 ] = sqrt(ta[i+0 ]);
2283 }
2284 }
2285 ;
2286 }
2287 else
2288 #endif
2289 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2290 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2291 {float*ta = a;
2292 {
2293 for(i=0;i+15<n;i+=16){
2294 ta[i+0 ] = sqrtf(ta[i+0 ]);
2295 ta[i+1 ] = sqrtf(ta[i+1 ]);
2296 ta[i+2 ] = sqrtf(ta[i+2 ]);
2297 ta[i+3 ] = sqrtf(ta[i+3 ]);
2298 ta[i+4 ] = sqrtf(ta[i+4 ]);
2299 ta[i+5 ] = sqrtf(ta[i+5 ]);
2300 ta[i+6 ] = sqrtf(ta[i+6 ]);
2301 ta[i+7 ] = sqrtf(ta[i+7 ]);
2302 ta[i+8 ] = sqrtf(ta[i+8 ]);
2303 ta[i+9 ] = sqrtf(ta[i+9 ]);
2304 ta[i+10 ] = sqrtf(ta[i+10 ]);
2305 ta[i+11 ] = sqrtf(ta[i+11 ]);
2306 ta[i+12 ] = sqrtf(ta[i+12 ]);
2307 ta[i+13 ] = sqrtf(ta[i+13 ]);
2308 ta[i+14 ] = sqrtf(ta[i+14 ]);
2309 ta[i+15 ] = sqrtf(ta[i+15 ]);
2310 }
2311 for( ;i<n;++i){ ta[i+0 ] = sqrtf(ta[i+0 ]);
2312 }
2313 }
2314 ;
2315 }
2316 else
2317 #endif
2318 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2319 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2320 {float complex*ta = a;
2321 {
2322 for(i=0;i+15<n;i+=16){
2323 ta[i+0 ] = csqrtf(ta[i+0 ]);
2324 ta[i+1 ] = csqrtf(ta[i+1 ]);
2325 ta[i+2 ] = csqrtf(ta[i+2 ]);
2326 ta[i+3 ] = csqrtf(ta[i+3 ]);
2327 ta[i+4 ] = csqrtf(ta[i+4 ]);
2328 ta[i+5 ] = csqrtf(ta[i+5 ]);
2329 ta[i+6 ] = csqrtf(ta[i+6 ]);
2330 ta[i+7 ] = csqrtf(ta[i+7 ]);
2331 ta[i+8 ] = csqrtf(ta[i+8 ]);
2332 ta[i+9 ] = csqrtf(ta[i+9 ]);
2333 ta[i+10 ] = csqrtf(ta[i+10 ]);
2334 ta[i+11 ] = csqrtf(ta[i+11 ]);
2335 ta[i+12 ] = csqrtf(ta[i+12 ]);
2336 ta[i+13 ] = csqrtf(ta[i+13 ]);
2337 ta[i+14 ] = csqrtf(ta[i+14 ]);
2338 ta[i+15 ] = csqrtf(ta[i+15 ]);
2339 }
2340 for( ;i<n;++i){ ta[i+0 ] = csqrtf(ta[i+0 ]);
2341 }
2342 }
2343 ;
2344 }
2345 else
2346 #endif
2347 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2348 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2349 {double complex*ta = a;
2350 {
2351 for(i=0;i+15<n;i+=16){
2352 ta[i+0 ] = csqrt(ta[i+0 ]);
2353 ta[i+1 ] = csqrt(ta[i+1 ]);
2354 ta[i+2 ] = csqrt(ta[i+2 ]);
2355 ta[i+3 ] = csqrt(ta[i+3 ]);
2356 ta[i+4 ] = csqrt(ta[i+4 ]);
2357 ta[i+5 ] = csqrt(ta[i+5 ]);
2358 ta[i+6 ] = csqrt(ta[i+6 ]);
2359 ta[i+7 ] = csqrt(ta[i+7 ]);
2360 ta[i+8 ] = csqrt(ta[i+8 ]);
2361 ta[i+9 ] = csqrt(ta[i+9 ]);
2362 ta[i+10 ] = csqrt(ta[i+10 ]);
2363 ta[i+11 ] = csqrt(ta[i+11 ]);
2364 ta[i+12 ] = csqrt(ta[i+12 ]);
2365 ta[i+13 ] = csqrt(ta[i+13 ]);
2366 ta[i+14 ] = csqrt(ta[i+14 ]);
2367 ta[i+15 ] = csqrt(ta[i+15 ]);
2368 }
2369 for( ;i<n;++i){ ta[i+0 ] = csqrt(ta[i+0 ]);
2370 }
2371 }
2372 ;
2373 }
2374 else
2375 #endif
2376 return RSB_ERR_UNSUPPORTED_TYPE ;
2377 err:
2378 return RSB_ERR_NO_ERROR;
2379 }
2380
rsb__vector_scale_inv(void * a,const void * alphap,rsb_type_t type,size_t n)2381 rsb_err_t rsb__vector_scale_inv(void * a, const void * alphap, rsb_type_t type, size_t n)
2382 {
2383 /*!
2384 * a <- 1/a * alpha
2385 *
2386 * \param array an array pointer
2387 * \param type a valid type code
2388 * \param n the input array length
2389 * \note see dscal in BLAS
2390 *
2391 * \return \rsberrcodemsg
2392 * */
2393 if(!alphap)
2394 return RSB_ERR_BADARGS;
2395
2396 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2397 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2398 {
2399 double alphai = ((double)(1.0))/(*(double*)alphap);
2400 return rsb_vector_scale(a,&alphai,type,n);
2401 }
2402 else
2403 #endif
2404 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2405 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2406 {
2407 float alphai = ((float)(1.0))/(*(float*)alphap);
2408 return rsb_vector_scale(a,&alphai,type,n);
2409 }
2410 else
2411 #endif
2412 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2413 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2414 {
2415 float complex alphai = ((float complex)(1.0))/(*(float complex*)alphap);
2416 return rsb_vector_scale(a,&alphai,type,n);
2417 }
2418 else
2419 #endif
2420 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2421 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2422 {
2423 double complex alphai = ((double complex)(1.0))/(*(double complex*)alphap);
2424 return rsb_vector_scale(a,&alphai,type,n);
2425 }
2426 else
2427 #endif
2428 return RSB_ERR_UNSUPPORTED_TYPE ;
2429 return RSB_ERR_NO_ERROR;
2430 }
2431
rsb__vector_sum_of_abs_diffs(void * c,const void * a,const void * b,rsb_type_t type,size_t n)2432 rsb_err_t rsb__vector_sum_of_abs_diffs(void * c, const void * a, const void * b, rsb_type_t type, size_t n)
2433 {
2434 /*!
2435 * \return \rsberrcodemsg
2436 * */
2437 size_t i;
2438 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2439 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2440 {
2441 const double*ap = a,*bp = b;
2442 double ac = ((double)(0));
2443 {
2444 for(i=0;i+15<n;i+=16){
2445 ac += fabs(ap[i+0 ]-bp[i+0 ]);
2446 ac += fabs(ap[i+1 ]-bp[i+1 ]);
2447 ac += fabs(ap[i+2 ]-bp[i+2 ]);
2448 ac += fabs(ap[i+3 ]-bp[i+3 ]);
2449 ac += fabs(ap[i+4 ]-bp[i+4 ]);
2450 ac += fabs(ap[i+5 ]-bp[i+5 ]);
2451 ac += fabs(ap[i+6 ]-bp[i+6 ]);
2452 ac += fabs(ap[i+7 ]-bp[i+7 ]);
2453 ac += fabs(ap[i+8 ]-bp[i+8 ]);
2454 ac += fabs(ap[i+9 ]-bp[i+9 ]);
2455 ac += fabs(ap[i+10 ]-bp[i+10 ]);
2456 ac += fabs(ap[i+11 ]-bp[i+11 ]);
2457 ac += fabs(ap[i+12 ]-bp[i+12 ]);
2458 ac += fabs(ap[i+13 ]-bp[i+13 ]);
2459 ac += fabs(ap[i+14 ]-bp[i+14 ]);
2460 ac += fabs(ap[i+15 ]-bp[i+15 ]);
2461 }
2462 for( ;i<n;++i){ ac += fabs(ap[i+0 ]-bp[i+0 ]);
2463 }
2464 }
2465 ;
2466 *((double*)(c)) = ac;
2467 }
2468 else
2469 #endif
2470 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2471 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2472 {
2473 const float*ap = a,*bp = b;
2474 float ac = ((float)(0));
2475 {
2476 for(i=0;i+15<n;i+=16){
2477 ac += fabsf(ap[i+0 ]-bp[i+0 ]);
2478 ac += fabsf(ap[i+1 ]-bp[i+1 ]);
2479 ac += fabsf(ap[i+2 ]-bp[i+2 ]);
2480 ac += fabsf(ap[i+3 ]-bp[i+3 ]);
2481 ac += fabsf(ap[i+4 ]-bp[i+4 ]);
2482 ac += fabsf(ap[i+5 ]-bp[i+5 ]);
2483 ac += fabsf(ap[i+6 ]-bp[i+6 ]);
2484 ac += fabsf(ap[i+7 ]-bp[i+7 ]);
2485 ac += fabsf(ap[i+8 ]-bp[i+8 ]);
2486 ac += fabsf(ap[i+9 ]-bp[i+9 ]);
2487 ac += fabsf(ap[i+10 ]-bp[i+10 ]);
2488 ac += fabsf(ap[i+11 ]-bp[i+11 ]);
2489 ac += fabsf(ap[i+12 ]-bp[i+12 ]);
2490 ac += fabsf(ap[i+13 ]-bp[i+13 ]);
2491 ac += fabsf(ap[i+14 ]-bp[i+14 ]);
2492 ac += fabsf(ap[i+15 ]-bp[i+15 ]);
2493 }
2494 for( ;i<n;++i){ ac += fabsf(ap[i+0 ]-bp[i+0 ]);
2495 }
2496 }
2497 ;
2498 *((float*)(c)) = ac;
2499 }
2500 else
2501 #endif
2502 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2503 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2504 {
2505 const float complex*ap = a,*bp = b;
2506 float complex ac = ((float complex)(0));
2507 {
2508 for(i=0;i+15<n;i+=16){
2509 ac += cabsf(ap[i+0 ]-bp[i+0 ]);
2510 ac += cabsf(ap[i+1 ]-bp[i+1 ]);
2511 ac += cabsf(ap[i+2 ]-bp[i+2 ]);
2512 ac += cabsf(ap[i+3 ]-bp[i+3 ]);
2513 ac += cabsf(ap[i+4 ]-bp[i+4 ]);
2514 ac += cabsf(ap[i+5 ]-bp[i+5 ]);
2515 ac += cabsf(ap[i+6 ]-bp[i+6 ]);
2516 ac += cabsf(ap[i+7 ]-bp[i+7 ]);
2517 ac += cabsf(ap[i+8 ]-bp[i+8 ]);
2518 ac += cabsf(ap[i+9 ]-bp[i+9 ]);
2519 ac += cabsf(ap[i+10 ]-bp[i+10 ]);
2520 ac += cabsf(ap[i+11 ]-bp[i+11 ]);
2521 ac += cabsf(ap[i+12 ]-bp[i+12 ]);
2522 ac += cabsf(ap[i+13 ]-bp[i+13 ]);
2523 ac += cabsf(ap[i+14 ]-bp[i+14 ]);
2524 ac += cabsf(ap[i+15 ]-bp[i+15 ]);
2525 }
2526 for( ;i<n;++i){ ac += cabsf(ap[i+0 ]-bp[i+0 ]);
2527 }
2528 }
2529 ;
2530 *((float complex*)(c)) = ac;
2531 }
2532 else
2533 #endif
2534 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2535 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2536 {
2537 const double complex*ap = a,*bp = b;
2538 double complex ac = ((double complex)(0));
2539 {
2540 for(i=0;i+15<n;i+=16){
2541 ac += cabs(ap[i+0 ]-bp[i+0 ]);
2542 ac += cabs(ap[i+1 ]-bp[i+1 ]);
2543 ac += cabs(ap[i+2 ]-bp[i+2 ]);
2544 ac += cabs(ap[i+3 ]-bp[i+3 ]);
2545 ac += cabs(ap[i+4 ]-bp[i+4 ]);
2546 ac += cabs(ap[i+5 ]-bp[i+5 ]);
2547 ac += cabs(ap[i+6 ]-bp[i+6 ]);
2548 ac += cabs(ap[i+7 ]-bp[i+7 ]);
2549 ac += cabs(ap[i+8 ]-bp[i+8 ]);
2550 ac += cabs(ap[i+9 ]-bp[i+9 ]);
2551 ac += cabs(ap[i+10 ]-bp[i+10 ]);
2552 ac += cabs(ap[i+11 ]-bp[i+11 ]);
2553 ac += cabs(ap[i+12 ]-bp[i+12 ]);
2554 ac += cabs(ap[i+13 ]-bp[i+13 ]);
2555 ac += cabs(ap[i+14 ]-bp[i+14 ]);
2556 ac += cabs(ap[i+15 ]-bp[i+15 ]);
2557 }
2558 for( ;i<n;++i){ ac += cabs(ap[i+0 ]-bp[i+0 ]);
2559 }
2560 }
2561 ;
2562 *((double complex*)(c)) = ac;
2563 }
2564 else
2565 #endif
2566 return RSB_ERR_UNSUPPORTED_TYPE ;
2567 return RSB_ERR_NO_ERROR;
2568 }
2569
rsb__vector_sum_of_abs(void * c,const void * a,rsb_type_t type,size_t n)2570 rsb_err_t rsb__vector_sum_of_abs(void * c, const void * a, rsb_type_t type, size_t n)
2571 {
2572 /*!
2573 * \return \rsberrcodemsg
2574 * */
2575 size_t i;
2576 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2577 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2578 {
2579 const double*ap = a;
2580 double ac = ((double)(0));
2581 {
2582 for(i=0;i+15<n;i+=16){
2583 ac += fabs(ap[i+0 ]);
2584 ac += fabs(ap[i+1 ]);
2585 ac += fabs(ap[i+2 ]);
2586 ac += fabs(ap[i+3 ]);
2587 ac += fabs(ap[i+4 ]);
2588 ac += fabs(ap[i+5 ]);
2589 ac += fabs(ap[i+6 ]);
2590 ac += fabs(ap[i+7 ]);
2591 ac += fabs(ap[i+8 ]);
2592 ac += fabs(ap[i+9 ]);
2593 ac += fabs(ap[i+10 ]);
2594 ac += fabs(ap[i+11 ]);
2595 ac += fabs(ap[i+12 ]);
2596 ac += fabs(ap[i+13 ]);
2597 ac += fabs(ap[i+14 ]);
2598 ac += fabs(ap[i+15 ]);
2599 }
2600 for( ;i<n;++i){ ac += fabs(ap[i+0 ]);
2601 }
2602 }
2603 ;
2604 *((double*)(c)) = ac;
2605 }
2606 else
2607 #endif
2608 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2609 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2610 {
2611 const float*ap = a;
2612 float ac = ((float)(0));
2613 {
2614 for(i=0;i+15<n;i+=16){
2615 ac += fabsf(ap[i+0 ]);
2616 ac += fabsf(ap[i+1 ]);
2617 ac += fabsf(ap[i+2 ]);
2618 ac += fabsf(ap[i+3 ]);
2619 ac += fabsf(ap[i+4 ]);
2620 ac += fabsf(ap[i+5 ]);
2621 ac += fabsf(ap[i+6 ]);
2622 ac += fabsf(ap[i+7 ]);
2623 ac += fabsf(ap[i+8 ]);
2624 ac += fabsf(ap[i+9 ]);
2625 ac += fabsf(ap[i+10 ]);
2626 ac += fabsf(ap[i+11 ]);
2627 ac += fabsf(ap[i+12 ]);
2628 ac += fabsf(ap[i+13 ]);
2629 ac += fabsf(ap[i+14 ]);
2630 ac += fabsf(ap[i+15 ]);
2631 }
2632 for( ;i<n;++i){ ac += fabsf(ap[i+0 ]);
2633 }
2634 }
2635 ;
2636 *((float*)(c)) = ac;
2637 }
2638 else
2639 #endif
2640 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2641 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2642 {
2643 const float complex*ap = a;
2644 float complex ac = ((float complex)(0));
2645 {
2646 for(i=0;i+15<n;i+=16){
2647 ac += cabsf(ap[i+0 ]);
2648 ac += cabsf(ap[i+1 ]);
2649 ac += cabsf(ap[i+2 ]);
2650 ac += cabsf(ap[i+3 ]);
2651 ac += cabsf(ap[i+4 ]);
2652 ac += cabsf(ap[i+5 ]);
2653 ac += cabsf(ap[i+6 ]);
2654 ac += cabsf(ap[i+7 ]);
2655 ac += cabsf(ap[i+8 ]);
2656 ac += cabsf(ap[i+9 ]);
2657 ac += cabsf(ap[i+10 ]);
2658 ac += cabsf(ap[i+11 ]);
2659 ac += cabsf(ap[i+12 ]);
2660 ac += cabsf(ap[i+13 ]);
2661 ac += cabsf(ap[i+14 ]);
2662 ac += cabsf(ap[i+15 ]);
2663 }
2664 for( ;i<n;++i){ ac += cabsf(ap[i+0 ]);
2665 }
2666 }
2667 ;
2668 *((float complex*)(c)) = ac;
2669 }
2670 else
2671 #endif
2672 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2673 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2674 {
2675 const double complex*ap = a;
2676 double complex ac = ((double complex)(0));
2677 {
2678 for(i=0;i+15<n;i+=16){
2679 ac += cabs(ap[i+0 ]);
2680 ac += cabs(ap[i+1 ]);
2681 ac += cabs(ap[i+2 ]);
2682 ac += cabs(ap[i+3 ]);
2683 ac += cabs(ap[i+4 ]);
2684 ac += cabs(ap[i+5 ]);
2685 ac += cabs(ap[i+6 ]);
2686 ac += cabs(ap[i+7 ]);
2687 ac += cabs(ap[i+8 ]);
2688 ac += cabs(ap[i+9 ]);
2689 ac += cabs(ap[i+10 ]);
2690 ac += cabs(ap[i+11 ]);
2691 ac += cabs(ap[i+12 ]);
2692 ac += cabs(ap[i+13 ]);
2693 ac += cabs(ap[i+14 ]);
2694 ac += cabs(ap[i+15 ]);
2695 }
2696 for( ;i<n;++i){ ac += cabs(ap[i+0 ]);
2697 }
2698 }
2699 ;
2700 *((double complex*)(c)) = ac;
2701 }
2702 else
2703 #endif
2704 return RSB_ERR_UNSUPPORTED_TYPE ;
2705 return RSB_ERR_NO_ERROR;
2706 }
2707
rsb__vector_to_abs(void * a,rsb_type_t type,size_t n)2708 rsb_err_t rsb__vector_to_abs(void * a, rsb_type_t type, size_t n)
2709 {
2710 /*!
2711 * \return \rsberrcodemsg
2712 * */
2713 size_t i;
2714 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2715 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2716 {double*ta = a;
2717 {
2718 for(i=0;i+15<n;i+=16){
2719 ta[i+0 ] = fabs(ta[i+0 ]);
2720 ta[i+1 ] = fabs(ta[i+1 ]);
2721 ta[i+2 ] = fabs(ta[i+2 ]);
2722 ta[i+3 ] = fabs(ta[i+3 ]);
2723 ta[i+4 ] = fabs(ta[i+4 ]);
2724 ta[i+5 ] = fabs(ta[i+5 ]);
2725 ta[i+6 ] = fabs(ta[i+6 ]);
2726 ta[i+7 ] = fabs(ta[i+7 ]);
2727 ta[i+8 ] = fabs(ta[i+8 ]);
2728 ta[i+9 ] = fabs(ta[i+9 ]);
2729 ta[i+10 ] = fabs(ta[i+10 ]);
2730 ta[i+11 ] = fabs(ta[i+11 ]);
2731 ta[i+12 ] = fabs(ta[i+12 ]);
2732 ta[i+13 ] = fabs(ta[i+13 ]);
2733 ta[i+14 ] = fabs(ta[i+14 ]);
2734 ta[i+15 ] = fabs(ta[i+15 ]);
2735 }
2736 for( ;i<n;++i){ ta[i+0 ] = fabs(ta[i+0 ]);
2737 }
2738 }
2739 ;
2740 }
2741 else
2742 #endif
2743 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2744 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2745 {float*ta = a;
2746 {
2747 for(i=0;i+15<n;i+=16){
2748 ta[i+0 ] = fabsf(ta[i+0 ]);
2749 ta[i+1 ] = fabsf(ta[i+1 ]);
2750 ta[i+2 ] = fabsf(ta[i+2 ]);
2751 ta[i+3 ] = fabsf(ta[i+3 ]);
2752 ta[i+4 ] = fabsf(ta[i+4 ]);
2753 ta[i+5 ] = fabsf(ta[i+5 ]);
2754 ta[i+6 ] = fabsf(ta[i+6 ]);
2755 ta[i+7 ] = fabsf(ta[i+7 ]);
2756 ta[i+8 ] = fabsf(ta[i+8 ]);
2757 ta[i+9 ] = fabsf(ta[i+9 ]);
2758 ta[i+10 ] = fabsf(ta[i+10 ]);
2759 ta[i+11 ] = fabsf(ta[i+11 ]);
2760 ta[i+12 ] = fabsf(ta[i+12 ]);
2761 ta[i+13 ] = fabsf(ta[i+13 ]);
2762 ta[i+14 ] = fabsf(ta[i+14 ]);
2763 ta[i+15 ] = fabsf(ta[i+15 ]);
2764 }
2765 for( ;i<n;++i){ ta[i+0 ] = fabsf(ta[i+0 ]);
2766 }
2767 }
2768 ;
2769 }
2770 else
2771 #endif
2772 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2773 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2774 {float complex*ta = a;
2775 {
2776 for(i=0;i+15<n;i+=16){
2777 ta[i+0 ] = cabsf(ta[i+0 ]);
2778 ta[i+1 ] = cabsf(ta[i+1 ]);
2779 ta[i+2 ] = cabsf(ta[i+2 ]);
2780 ta[i+3 ] = cabsf(ta[i+3 ]);
2781 ta[i+4 ] = cabsf(ta[i+4 ]);
2782 ta[i+5 ] = cabsf(ta[i+5 ]);
2783 ta[i+6 ] = cabsf(ta[i+6 ]);
2784 ta[i+7 ] = cabsf(ta[i+7 ]);
2785 ta[i+8 ] = cabsf(ta[i+8 ]);
2786 ta[i+9 ] = cabsf(ta[i+9 ]);
2787 ta[i+10 ] = cabsf(ta[i+10 ]);
2788 ta[i+11 ] = cabsf(ta[i+11 ]);
2789 ta[i+12 ] = cabsf(ta[i+12 ]);
2790 ta[i+13 ] = cabsf(ta[i+13 ]);
2791 ta[i+14 ] = cabsf(ta[i+14 ]);
2792 ta[i+15 ] = cabsf(ta[i+15 ]);
2793 }
2794 for( ;i<n;++i){ ta[i+0 ] = cabsf(ta[i+0 ]);
2795 }
2796 }
2797 ;
2798 }
2799 else
2800 #endif
2801 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2802 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2803 {double complex*ta = a;
2804 {
2805 for(i=0;i+15<n;i+=16){
2806 ta[i+0 ] = cabs(ta[i+0 ]);
2807 ta[i+1 ] = cabs(ta[i+1 ]);
2808 ta[i+2 ] = cabs(ta[i+2 ]);
2809 ta[i+3 ] = cabs(ta[i+3 ]);
2810 ta[i+4 ] = cabs(ta[i+4 ]);
2811 ta[i+5 ] = cabs(ta[i+5 ]);
2812 ta[i+6 ] = cabs(ta[i+6 ]);
2813 ta[i+7 ] = cabs(ta[i+7 ]);
2814 ta[i+8 ] = cabs(ta[i+8 ]);
2815 ta[i+9 ] = cabs(ta[i+9 ]);
2816 ta[i+10 ] = cabs(ta[i+10 ]);
2817 ta[i+11 ] = cabs(ta[i+11 ]);
2818 ta[i+12 ] = cabs(ta[i+12 ]);
2819 ta[i+13 ] = cabs(ta[i+13 ]);
2820 ta[i+14 ] = cabs(ta[i+14 ]);
2821 ta[i+15 ] = cabs(ta[i+15 ]);
2822 }
2823 for( ;i<n;++i){ ta[i+0 ] = cabs(ta[i+0 ]);
2824 }
2825 }
2826 ;
2827 }
2828 else
2829 #endif
2830 return RSB_ERR_UNSUPPORTED_TYPE ;
2831 return RSB_ERR_NO_ERROR;
2832 }
2833
rsb_alpha_sum(void * a,const void * b,const void * alphap,rsb_type_t type,size_t n)2834 static rsb_err_t rsb_alpha_sum(void * a, const void * b, const void * alphap, rsb_type_t type, size_t n)
2835 {
2836 /*!
2837 * a <- a + alpha * b
2838 *
2839 * \param array an array pointer
2840 * \param type a valid type code
2841 * \param n the input array length
2842 * \note see daxpy in BLAS
2843 *
2844 * \return \rsberrcodemsg
2845 * */
2846 size_t i;
2847 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2848 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
2849 {
2850 const double alpha = alphap ? *(double*)alphap : ((double)(1.0));
2851 double*ta = a; const double*tb = b;
2852 {
2853 for(i=0;i+15<n;i+=16){
2854 ta[i+0 ]+=alpha*tb[i+0 ];
2855 ta[i+1 ]+=alpha*tb[i+1 ];
2856 ta[i+2 ]+=alpha*tb[i+2 ];
2857 ta[i+3 ]+=alpha*tb[i+3 ];
2858 ta[i+4 ]+=alpha*tb[i+4 ];
2859 ta[i+5 ]+=alpha*tb[i+5 ];
2860 ta[i+6 ]+=alpha*tb[i+6 ];
2861 ta[i+7 ]+=alpha*tb[i+7 ];
2862 ta[i+8 ]+=alpha*tb[i+8 ];
2863 ta[i+9 ]+=alpha*tb[i+9 ];
2864 ta[i+10 ]+=alpha*tb[i+10 ];
2865 ta[i+11 ]+=alpha*tb[i+11 ];
2866 ta[i+12 ]+=alpha*tb[i+12 ];
2867 ta[i+13 ]+=alpha*tb[i+13 ];
2868 ta[i+14 ]+=alpha*tb[i+14 ];
2869 ta[i+15 ]+=alpha*tb[i+15 ];
2870 }
2871 for( ;i<n;++i){ ta[i+0 ]+=alpha*tb[i+0 ];
2872 }
2873 }
2874 ;
2875 }
2876 else
2877 #endif
2878 #ifdef RSB_NUMERICAL_TYPE_FLOAT
2879 if( type == RSB_NUMERICAL_TYPE_FLOAT )
2880 {
2881 const float alpha = alphap ? *(float*)alphap : ((float)(1.0));
2882 float*ta = a; const float*tb = b;
2883 {
2884 for(i=0;i+15<n;i+=16){
2885 ta[i+0 ]+=alpha*tb[i+0 ];
2886 ta[i+1 ]+=alpha*tb[i+1 ];
2887 ta[i+2 ]+=alpha*tb[i+2 ];
2888 ta[i+3 ]+=alpha*tb[i+3 ];
2889 ta[i+4 ]+=alpha*tb[i+4 ];
2890 ta[i+5 ]+=alpha*tb[i+5 ];
2891 ta[i+6 ]+=alpha*tb[i+6 ];
2892 ta[i+7 ]+=alpha*tb[i+7 ];
2893 ta[i+8 ]+=alpha*tb[i+8 ];
2894 ta[i+9 ]+=alpha*tb[i+9 ];
2895 ta[i+10 ]+=alpha*tb[i+10 ];
2896 ta[i+11 ]+=alpha*tb[i+11 ];
2897 ta[i+12 ]+=alpha*tb[i+12 ];
2898 ta[i+13 ]+=alpha*tb[i+13 ];
2899 ta[i+14 ]+=alpha*tb[i+14 ];
2900 ta[i+15 ]+=alpha*tb[i+15 ];
2901 }
2902 for( ;i<n;++i){ ta[i+0 ]+=alpha*tb[i+0 ];
2903 }
2904 }
2905 ;
2906 }
2907 else
2908 #endif
2909 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
2910 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
2911 {
2912 const float complex alpha = alphap ? *(float complex*)alphap : ((float complex)(1.0));
2913 float complex*ta = a; const float complex*tb = b;
2914 {
2915 for(i=0;i+15<n;i+=16){
2916 ta[i+0 ]+=alpha*tb[i+0 ];
2917 ta[i+1 ]+=alpha*tb[i+1 ];
2918 ta[i+2 ]+=alpha*tb[i+2 ];
2919 ta[i+3 ]+=alpha*tb[i+3 ];
2920 ta[i+4 ]+=alpha*tb[i+4 ];
2921 ta[i+5 ]+=alpha*tb[i+5 ];
2922 ta[i+6 ]+=alpha*tb[i+6 ];
2923 ta[i+7 ]+=alpha*tb[i+7 ];
2924 ta[i+8 ]+=alpha*tb[i+8 ];
2925 ta[i+9 ]+=alpha*tb[i+9 ];
2926 ta[i+10 ]+=alpha*tb[i+10 ];
2927 ta[i+11 ]+=alpha*tb[i+11 ];
2928 ta[i+12 ]+=alpha*tb[i+12 ];
2929 ta[i+13 ]+=alpha*tb[i+13 ];
2930 ta[i+14 ]+=alpha*tb[i+14 ];
2931 ta[i+15 ]+=alpha*tb[i+15 ];
2932 }
2933 for( ;i<n;++i){ ta[i+0 ]+=alpha*tb[i+0 ];
2934 }
2935 }
2936 ;
2937 }
2938 else
2939 #endif
2940 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
2941 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
2942 {
2943 const double complex alpha = alphap ? *(double complex*)alphap : ((double complex)(1.0));
2944 double complex*ta = a; const double complex*tb = b;
2945 {
2946 for(i=0;i+15<n;i+=16){
2947 ta[i+0 ]+=alpha*tb[i+0 ];
2948 ta[i+1 ]+=alpha*tb[i+1 ];
2949 ta[i+2 ]+=alpha*tb[i+2 ];
2950 ta[i+3 ]+=alpha*tb[i+3 ];
2951 ta[i+4 ]+=alpha*tb[i+4 ];
2952 ta[i+5 ]+=alpha*tb[i+5 ];
2953 ta[i+6 ]+=alpha*tb[i+6 ];
2954 ta[i+7 ]+=alpha*tb[i+7 ];
2955 ta[i+8 ]+=alpha*tb[i+8 ];
2956 ta[i+9 ]+=alpha*tb[i+9 ];
2957 ta[i+10 ]+=alpha*tb[i+10 ];
2958 ta[i+11 ]+=alpha*tb[i+11 ];
2959 ta[i+12 ]+=alpha*tb[i+12 ];
2960 ta[i+13 ]+=alpha*tb[i+13 ];
2961 ta[i+14 ]+=alpha*tb[i+14 ];
2962 ta[i+15 ]+=alpha*tb[i+15 ];
2963 }
2964 for( ;i<n;++i){ ta[i+0 ]+=alpha*tb[i+0 ];
2965 }
2966 }
2967 ;
2968 }
2969 else
2970 #endif
2971 return RSB_ERR_UNSUPPORTED_TYPE ;
2972 return RSB_ERR_NO_ERROR;
2973 }
2974
2975
rsb__util_set_array_to_converted_integer(void * p,rsb_flags_t typecode,const rsb_nnz_idx_t n,const rsb_nnz_idx_t incp,const rsb_int v)2976 rsb_err_t rsb__util_set_array_to_converted_integer(void *p, rsb_flags_t typecode, const rsb_nnz_idx_t n, const rsb_nnz_idx_t incp, const rsb_int v)
2977 {
2978 /*!
2979 * */
2980 size_t i;
2981
2982 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
2983 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
2984 {
2985 double*tp = p; const double tv = (double)v;
2986 {
2987 for(i=0;i+15<n;i+=16){
2988 tp[((i+0 )*incp)] = tv;
2989 tp[((i+1 )*incp)] = tv;
2990 tp[((i+2 )*incp)] = tv;
2991 tp[((i+3 )*incp)] = tv;
2992 tp[((i+4 )*incp)] = tv;
2993 tp[((i+5 )*incp)] = tv;
2994 tp[((i+6 )*incp)] = tv;
2995 tp[((i+7 )*incp)] = tv;
2996 tp[((i+8 )*incp)] = tv;
2997 tp[((i+9 )*incp)] = tv;
2998 tp[((i+10 )*incp)] = tv;
2999 tp[((i+11 )*incp)] = tv;
3000 tp[((i+12 )*incp)] = tv;
3001 tp[((i+13 )*incp)] = tv;
3002 tp[((i+14 )*incp)] = tv;
3003 tp[((i+15 )*incp)] = tv;
3004 }
3005 for( ;i<n;++i){ tp[((i+0 )*incp)] = tv;
3006 }
3007 }
3008 ;
3009 }
3010 else
3011 #endif
3012 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3013 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
3014 {
3015 float*tp = p; const float tv = (float)v;
3016 {
3017 for(i=0;i+15<n;i+=16){
3018 tp[((i+0 )*incp)] = tv;
3019 tp[((i+1 )*incp)] = tv;
3020 tp[((i+2 )*incp)] = tv;
3021 tp[((i+3 )*incp)] = tv;
3022 tp[((i+4 )*incp)] = tv;
3023 tp[((i+5 )*incp)] = tv;
3024 tp[((i+6 )*incp)] = tv;
3025 tp[((i+7 )*incp)] = tv;
3026 tp[((i+8 )*incp)] = tv;
3027 tp[((i+9 )*incp)] = tv;
3028 tp[((i+10 )*incp)] = tv;
3029 tp[((i+11 )*incp)] = tv;
3030 tp[((i+12 )*incp)] = tv;
3031 tp[((i+13 )*incp)] = tv;
3032 tp[((i+14 )*incp)] = tv;
3033 tp[((i+15 )*incp)] = tv;
3034 }
3035 for( ;i<n;++i){ tp[((i+0 )*incp)] = tv;
3036 }
3037 }
3038 ;
3039 }
3040 else
3041 #endif
3042 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3043 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3044 {
3045 float complex*tp = p; const float complex tv = (float complex)v;
3046 {
3047 for(i=0;i+15<n;i+=16){
3048 tp[((i+0 )*incp)] = tv;
3049 tp[((i+1 )*incp)] = tv;
3050 tp[((i+2 )*incp)] = tv;
3051 tp[((i+3 )*incp)] = tv;
3052 tp[((i+4 )*incp)] = tv;
3053 tp[((i+5 )*incp)] = tv;
3054 tp[((i+6 )*incp)] = tv;
3055 tp[((i+7 )*incp)] = tv;
3056 tp[((i+8 )*incp)] = tv;
3057 tp[((i+9 )*incp)] = tv;
3058 tp[((i+10 )*incp)] = tv;
3059 tp[((i+11 )*incp)] = tv;
3060 tp[((i+12 )*incp)] = tv;
3061 tp[((i+13 )*incp)] = tv;
3062 tp[((i+14 )*incp)] = tv;
3063 tp[((i+15 )*incp)] = tv;
3064 }
3065 for( ;i<n;++i){ tp[((i+0 )*incp)] = tv;
3066 }
3067 }
3068 ;
3069 }
3070 else
3071 #endif
3072 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3073 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
3074 {
3075 double complex*tp = p; const double complex tv = (double complex)v;
3076 {
3077 for(i=0;i+15<n;i+=16){
3078 tp[((i+0 )*incp)] = tv;
3079 tp[((i+1 )*incp)] = tv;
3080 tp[((i+2 )*incp)] = tv;
3081 tp[((i+3 )*incp)] = tv;
3082 tp[((i+4 )*incp)] = tv;
3083 tp[((i+5 )*incp)] = tv;
3084 tp[((i+6 )*incp)] = tv;
3085 tp[((i+7 )*incp)] = tv;
3086 tp[((i+8 )*incp)] = tv;
3087 tp[((i+9 )*incp)] = tv;
3088 tp[((i+10 )*incp)] = tv;
3089 tp[((i+11 )*incp)] = tv;
3090 tp[((i+12 )*incp)] = tv;
3091 tp[((i+13 )*incp)] = tv;
3092 tp[((i+14 )*incp)] = tv;
3093 tp[((i+15 )*incp)] = tv;
3094 }
3095 for( ;i<n;++i){ tp[((i+0 )*incp)] = tv;
3096 }
3097 }
3098 ;
3099 }
3100 else
3101 #endif
3102 return RSB_ERR_UNSUPPORTED_TYPE ;
3103 return RSB_ERR_NO_ERROR;
3104 }
3105
rsb__vectors_left_sum_reduce_and_zero(void * d,void * s,const rsb_type_t typecode,const size_t n,const size_t incd,const size_t off)3106 rsb_err_t rsb__vectors_left_sum_reduce_and_zero(void * d, void * s, const rsb_type_t typecode, const size_t n, const size_t incd, const size_t off)
3107 {
3108 /*!
3109 * d[off:off+n-1] <- s[off:off+n-1]
3110 * s[off:off+n-1] <- 0
3111 *
3112 * \param array an array pointer
3113 * \param typecode a valid type code
3114 * \param incd the stride of d
3115 * \param off offset in the vectors
3116 * \return \rsberrcodemsg
3117 * */
3118 size_t i;
3119
3120 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3121 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
3122 {
3123 double*td = d,*ts = s;
3124 {
3125 for(i=0;i+15<n;i+=16){
3126 td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3127 ts[(off+i+0 )] = ((double)(0));
3128 td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3129 ts[(off+i+1 )] = ((double)(0));
3130 td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3131 ts[(off+i+2 )] = ((double)(0));
3132 td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3133 ts[(off+i+3 )] = ((double)(0));
3134 td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3135 ts[(off+i+4 )] = ((double)(0));
3136 td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3137 ts[(off+i+5 )] = ((double)(0));
3138 td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3139 ts[(off+i+6 )] = ((double)(0));
3140 td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3141 ts[(off+i+7 )] = ((double)(0));
3142 td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3143 ts[(off+i+8 )] = ((double)(0));
3144 td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3145 ts[(off+i+9 )] = ((double)(0));
3146 td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3147 ts[(off+i+10 )] = ((double)(0));
3148 td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3149 ts[(off+i+11 )] = ((double)(0));
3150 td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3151 ts[(off+i+12 )] = ((double)(0));
3152 td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3153 ts[(off+i+13 )] = ((double)(0));
3154 td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3155 ts[(off+i+14 )] = ((double)(0));
3156 td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3157 ts[(off+i+15 )] = ((double)(0));
3158 }
3159 for( ;i<n;++i){ td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3160 ts[(off+i+0 )] = ((double)(0));
3161 }
3162 }
3163 ;
3164 }
3165 else
3166 #endif
3167 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3168 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
3169 {
3170 float*td = d,*ts = s;
3171 {
3172 for(i=0;i+15<n;i+=16){
3173 td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3174 ts[(off+i+0 )] = ((float)(0));
3175 td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3176 ts[(off+i+1 )] = ((float)(0));
3177 td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3178 ts[(off+i+2 )] = ((float)(0));
3179 td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3180 ts[(off+i+3 )] = ((float)(0));
3181 td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3182 ts[(off+i+4 )] = ((float)(0));
3183 td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3184 ts[(off+i+5 )] = ((float)(0));
3185 td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3186 ts[(off+i+6 )] = ((float)(0));
3187 td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3188 ts[(off+i+7 )] = ((float)(0));
3189 td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3190 ts[(off+i+8 )] = ((float)(0));
3191 td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3192 ts[(off+i+9 )] = ((float)(0));
3193 td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3194 ts[(off+i+10 )] = ((float)(0));
3195 td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3196 ts[(off+i+11 )] = ((float)(0));
3197 td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3198 ts[(off+i+12 )] = ((float)(0));
3199 td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3200 ts[(off+i+13 )] = ((float)(0));
3201 td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3202 ts[(off+i+14 )] = ((float)(0));
3203 td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3204 ts[(off+i+15 )] = ((float)(0));
3205 }
3206 for( ;i<n;++i){ td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3207 ts[(off+i+0 )] = ((float)(0));
3208 }
3209 }
3210 ;
3211 }
3212 else
3213 #endif
3214 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3215 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3216 {
3217 float complex*td = d,*ts = s;
3218 {
3219 for(i=0;i+15<n;i+=16){
3220 td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3221 ts[(off+i+0 )] = ((float complex)(0));
3222 td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3223 ts[(off+i+1 )] = ((float complex)(0));
3224 td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3225 ts[(off+i+2 )] = ((float complex)(0));
3226 td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3227 ts[(off+i+3 )] = ((float complex)(0));
3228 td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3229 ts[(off+i+4 )] = ((float complex)(0));
3230 td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3231 ts[(off+i+5 )] = ((float complex)(0));
3232 td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3233 ts[(off+i+6 )] = ((float complex)(0));
3234 td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3235 ts[(off+i+7 )] = ((float complex)(0));
3236 td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3237 ts[(off+i+8 )] = ((float complex)(0));
3238 td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3239 ts[(off+i+9 )] = ((float complex)(0));
3240 td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3241 ts[(off+i+10 )] = ((float complex)(0));
3242 td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3243 ts[(off+i+11 )] = ((float complex)(0));
3244 td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3245 ts[(off+i+12 )] = ((float complex)(0));
3246 td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3247 ts[(off+i+13 )] = ((float complex)(0));
3248 td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3249 ts[(off+i+14 )] = ((float complex)(0));
3250 td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3251 ts[(off+i+15 )] = ((float complex)(0));
3252 }
3253 for( ;i<n;++i){ td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3254 ts[(off+i+0 )] = ((float complex)(0));
3255 }
3256 }
3257 ;
3258 }
3259 else
3260 #endif
3261 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3262 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
3263 {
3264 double complex*td = d,*ts = s;
3265 {
3266 for(i=0;i+15<n;i+=16){
3267 td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3268 ts[(off+i+0 )] = ((double complex)(0));
3269 td[(off+i+1 )*incd]+=ts[(off+i+1 )];
3270 ts[(off+i+1 )] = ((double complex)(0));
3271 td[(off+i+2 )*incd]+=ts[(off+i+2 )];
3272 ts[(off+i+2 )] = ((double complex)(0));
3273 td[(off+i+3 )*incd]+=ts[(off+i+3 )];
3274 ts[(off+i+3 )] = ((double complex)(0));
3275 td[(off+i+4 )*incd]+=ts[(off+i+4 )];
3276 ts[(off+i+4 )] = ((double complex)(0));
3277 td[(off+i+5 )*incd]+=ts[(off+i+5 )];
3278 ts[(off+i+5 )] = ((double complex)(0));
3279 td[(off+i+6 )*incd]+=ts[(off+i+6 )];
3280 ts[(off+i+6 )] = ((double complex)(0));
3281 td[(off+i+7 )*incd]+=ts[(off+i+7 )];
3282 ts[(off+i+7 )] = ((double complex)(0));
3283 td[(off+i+8 )*incd]+=ts[(off+i+8 )];
3284 ts[(off+i+8 )] = ((double complex)(0));
3285 td[(off+i+9 )*incd]+=ts[(off+i+9 )];
3286 ts[(off+i+9 )] = ((double complex)(0));
3287 td[(off+i+10 )*incd]+=ts[(off+i+10 )];
3288 ts[(off+i+10 )] = ((double complex)(0));
3289 td[(off+i+11 )*incd]+=ts[(off+i+11 )];
3290 ts[(off+i+11 )] = ((double complex)(0));
3291 td[(off+i+12 )*incd]+=ts[(off+i+12 )];
3292 ts[(off+i+12 )] = ((double complex)(0));
3293 td[(off+i+13 )*incd]+=ts[(off+i+13 )];
3294 ts[(off+i+13 )] = ((double complex)(0));
3295 td[(off+i+14 )*incd]+=ts[(off+i+14 )];
3296 ts[(off+i+14 )] = ((double complex)(0));
3297 td[(off+i+15 )*incd]+=ts[(off+i+15 )];
3298 ts[(off+i+15 )] = ((double complex)(0));
3299 }
3300 for( ;i<n;++i){ td[(off+i+0 )*incd]+=ts[(off+i+0 )];
3301 ts[(off+i+0 )] = ((double complex)(0));
3302 }
3303 }
3304 ;
3305 }
3306 else
3307 #endif
3308 return RSB_ERR_UNSUPPORTED_TYPE ;
3309 return RSB_ERR_NO_ERROR;
3310 }
3311
3312
rsb_alpha_sum_strided(void * a,const void * b,const void * alphap,rsb_type_t type,size_t n,int inca,int incb)3313 static rsb_err_t rsb_alpha_sum_strided(void * a, const void * b, const void * alphap, rsb_type_t type, size_t n, int inca, int incb)
3314 {
3315 /*!
3316 * a <- a + alpha * b
3317 *
3318 * \param array an array pointer
3319 * \param type a valid type code
3320 * \param n the input array length
3321 * \note see daxpy in BLAS
3322 *
3323 * \return \rsberrcodemsg
3324 * */
3325 size_t i;
3326 if(inca == 1 && incb == 1)
3327 return rsb_alpha_sum(a,b,alphap,type,n);
3328 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3329 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
3330 {
3331 const double alpha = alphap ? *(double*)alphap : ((double)(1.0));
3332 double*ta = a; const double*tb = b;
3333 {
3334 for(i=0;i+15<n;i+=16){
3335 ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3336 ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3337 ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3338 ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3339 ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3340 ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3341 ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3342 ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3343 ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3344 ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3345 ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3346 ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3347 ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3348 ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3349 ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3350 ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3351 }
3352 for( ;i<n;++i){ ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3353 }
3354 }
3355 ;
3356 }
3357 else
3358 #endif
3359 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3360 if( type == RSB_NUMERICAL_TYPE_FLOAT )
3361 {
3362 const float alpha = alphap ? *(float*)alphap : ((float)(1.0));
3363 float*ta = a; const float*tb = b;
3364 {
3365 for(i=0;i+15<n;i+=16){
3366 ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3367 ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3368 ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3369 ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3370 ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3371 ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3372 ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3373 ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3374 ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3375 ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3376 ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3377 ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3378 ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3379 ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3380 ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3381 ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3382 }
3383 for( ;i<n;++i){ ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3384 }
3385 }
3386 ;
3387 }
3388 else
3389 #endif
3390 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3391 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3392 {
3393 const float complex alpha = alphap ? *(float complex*)alphap : ((float complex)(1.0));
3394 float complex*ta = a; const float complex*tb = b;
3395 {
3396 for(i=0;i+15<n;i+=16){
3397 ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3398 ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3399 ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3400 ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3401 ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3402 ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3403 ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3404 ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3405 ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3406 ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3407 ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3408 ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3409 ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3410 ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3411 ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3412 ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3413 }
3414 for( ;i<n;++i){ ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3415 }
3416 }
3417 ;
3418 }
3419 else
3420 #endif
3421 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3422 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
3423 {
3424 const double complex alpha = alphap ? *(double complex*)alphap : ((double complex)(1.0));
3425 double complex*ta = a; const double complex*tb = b;
3426 {
3427 for(i=0;i+15<n;i+=16){
3428 ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3429 ta[inca*(i+1 )]+=alpha*tb[incb*(i+1 )];
3430 ta[inca*(i+2 )]+=alpha*tb[incb*(i+2 )];
3431 ta[inca*(i+3 )]+=alpha*tb[incb*(i+3 )];
3432 ta[inca*(i+4 )]+=alpha*tb[incb*(i+4 )];
3433 ta[inca*(i+5 )]+=alpha*tb[incb*(i+5 )];
3434 ta[inca*(i+6 )]+=alpha*tb[incb*(i+6 )];
3435 ta[inca*(i+7 )]+=alpha*tb[incb*(i+7 )];
3436 ta[inca*(i+8 )]+=alpha*tb[incb*(i+8 )];
3437 ta[inca*(i+9 )]+=alpha*tb[incb*(i+9 )];
3438 ta[inca*(i+10 )]+=alpha*tb[incb*(i+10 )];
3439 ta[inca*(i+11 )]+=alpha*tb[incb*(i+11 )];
3440 ta[inca*(i+12 )]+=alpha*tb[incb*(i+12 )];
3441 ta[inca*(i+13 )]+=alpha*tb[incb*(i+13 )];
3442 ta[inca*(i+14 )]+=alpha*tb[incb*(i+14 )];
3443 ta[inca*(i+15 )]+=alpha*tb[incb*(i+15 )];
3444 }
3445 for( ;i<n;++i){ ta[inca*(i+0 )]+=alpha*tb[incb*(i+0 )];
3446 }
3447 }
3448 ;
3449 }
3450 else
3451 #endif
3452 return RSB_ERR_UNSUPPORTED_TYPE ;
3453 return RSB_ERR_NO_ERROR;
3454 }
3455
rsb__cblas_Xaxpy(rsb_type_t type,size_t n,const void * alphap,const void * x,const int incx,void * y,const int incy)3456 rsb_err_t rsb__cblas_Xaxpy(rsb_type_t type, size_t n, const void * alphap, const void * x, const int incx, void * y, const int incy)
3457 {
3458 /*!
3459 * y <- y + alpha * x
3460 */
3461 return rsb_alpha_sum_strided(y,x,alphap,type,n,incy,incx);
3462 }
3463
rsb__vector_mult(const void * a,const void * b,void * c,rsb_type_t type,size_t n)3464 rsb_err_t rsb__vector_mult(const void * a, const void * b, void * c, rsb_type_t type, size_t n)
3465 {
3466 /*!
3467 * c <- a*b
3468 * It is allowed to give c == a or c == b or a == b
3469 *
3470 * \param array an array pointer
3471 * \param type a valid type code
3472 * \param n the input array length
3473 *
3474 * FIXME : useless ?
3475 *
3476 * \return \rsberrcodemsg
3477 * */
3478 size_t i;
3479 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3480 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
3481 {
3482 const double*ta = a; const double*tb = b; double*tc = c;
3483 {
3484 for(i=0;i+15<n;i+=16){
3485 tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3486 tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3487 tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3488 tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3489 tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3490 tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3491 tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3492 tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3493 tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3494 tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3495 tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3496 tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3497 tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3498 tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3499 tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3500 tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3501 }
3502 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3503 }
3504 }
3505 ;
3506 }
3507 else
3508 #endif
3509 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3510 if( type == RSB_NUMERICAL_TYPE_FLOAT )
3511 {
3512 const float*ta = a; const float*tb = b; float*tc = c;
3513 {
3514 for(i=0;i+15<n;i+=16){
3515 tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3516 tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3517 tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3518 tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3519 tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3520 tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3521 tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3522 tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3523 tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3524 tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3525 tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3526 tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3527 tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3528 tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3529 tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3530 tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3531 }
3532 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3533 }
3534 }
3535 ;
3536 }
3537 else
3538 #endif
3539 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3540 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3541 {
3542 const float complex*ta = a; const float complex*tb = b; float complex*tc = c;
3543 {
3544 for(i=0;i+15<n;i+=16){
3545 tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3546 tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3547 tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3548 tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3549 tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3550 tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3551 tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3552 tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3553 tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3554 tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3555 tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3556 tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3557 tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3558 tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3559 tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3560 tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3561 }
3562 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3563 }
3564 }
3565 ;
3566 }
3567 else
3568 #endif
3569 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3570 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
3571 {
3572 const double complex*ta = a; const double complex*tb = b; double complex*tc = c;
3573 {
3574 for(i=0;i+15<n;i+=16){
3575 tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3576 tc[i+1 ] = ta[i+1 ]*tb[i+1 ];
3577 tc[i+2 ] = ta[i+2 ]*tb[i+2 ];
3578 tc[i+3 ] = ta[i+3 ]*tb[i+3 ];
3579 tc[i+4 ] = ta[i+4 ]*tb[i+4 ];
3580 tc[i+5 ] = ta[i+5 ]*tb[i+5 ];
3581 tc[i+6 ] = ta[i+6 ]*tb[i+6 ];
3582 tc[i+7 ] = ta[i+7 ]*tb[i+7 ];
3583 tc[i+8 ] = ta[i+8 ]*tb[i+8 ];
3584 tc[i+9 ] = ta[i+9 ]*tb[i+9 ];
3585 tc[i+10 ] = ta[i+10 ]*tb[i+10 ];
3586 tc[i+11 ] = ta[i+11 ]*tb[i+11 ];
3587 tc[i+12 ] = ta[i+12 ]*tb[i+12 ];
3588 tc[i+13 ] = ta[i+13 ]*tb[i+13 ];
3589 tc[i+14 ] = ta[i+14 ]*tb[i+14 ];
3590 tc[i+15 ] = ta[i+15 ]*tb[i+15 ];
3591 }
3592 for( ;i<n;++i){ tc[i+0 ] = ta[i+0 ]*tb[i+0 ];
3593 }
3594 }
3595 ;
3596 }
3597 else
3598 #endif
3599 return RSB_ERR_UNSUPPORTED_TYPE ;
3600 return RSB_ERR_NO_ERROR;
3601 }
3602
rsb__xcopy(void * a,const void * b,rsb_nnz_idx_t toi,rsb_nnz_idx_t foi,rsb_nnz_idx_t n,size_t el_size)3603 rsb_err_t rsb__xcopy(void * a, const void * b, rsb_nnz_idx_t toi, rsb_nnz_idx_t foi, rsb_nnz_idx_t n,size_t el_size)
3604 {
3605 /*!
3606 * a[toi:toi+n] <- b[foi:foi+n]
3607 *
3608 * \param array an array pointer
3609 * \param type a valid type code
3610 *
3611 * \return \rsberrcodemsg
3612 * */
3613 rsb__memcpy(((rsb_byte_t*)a)+el_size*toi,((const rsb_byte_t*)b)+el_size*foi,el_size*n);
3614 return RSB_ERR_NO_ERROR;
3615 }
3616
rsb__do_are_same(const void * ap,const void * bp,rsb_nnz_idx_t n,rsb_type_t typecode,rsb_nnz_idx_t incx,rsb_nnz_idx_t incy)3617 rsb_err_t rsb__do_are_same(const void * ap, const void * bp, rsb_nnz_idx_t n,rsb_type_t typecode, rsb_nnz_idx_t incx, rsb_nnz_idx_t incy)
3618 {
3619 /*!
3620 *
3621 * \param array an array pointer
3622 * \param type a valid type code
3623 *
3624 * \return \rsberrcodemsg
3625 *
3626 * For cases like 1+0I differing from 1-0I ..
3627 * */
3628 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3629 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
3630 {
3631 rsb_nnz_idx_t i;
3632 const double *a = ap; const double *b = bp;
3633
3634 {
3635 for(i=0;i+15<n;i+=16){
3636 if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3637 if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3638 if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3639 if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3640 if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3641 if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3642 if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3643 if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3644 if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3645 if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3646 if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3647 if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3648 if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3649 if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3650 if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3651 if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3652 }
3653 for( ;i<n;++i){ if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3654 }
3655 }
3656 return RSB_ERR_NO_ERROR;
3657 }
3658 else
3659 #endif
3660 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3661 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
3662 {
3663 rsb_nnz_idx_t i;
3664 const float *a = ap; const float *b = bp;
3665
3666 {
3667 for(i=0;i+15<n;i+=16){
3668 if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3669 if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3670 if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3671 if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3672 if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3673 if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3674 if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3675 if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3676 if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3677 if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3678 if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3679 if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3680 if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3681 if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3682 if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3683 if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3684 }
3685 for( ;i<n;++i){ if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3686 }
3687 }
3688 return RSB_ERR_NO_ERROR;
3689 }
3690 else
3691 #endif
3692 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3693 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3694 {
3695 rsb_nnz_idx_t i;
3696 const float complex *a = ap; const float complex *b = bp;
3697
3698 {
3699 for(i=0;i+15<n;i+=16){
3700 if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3701 if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3702 if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3703 if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3704 if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3705 if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3706 if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3707 if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3708 if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3709 if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3710 if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3711 if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3712 if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3713 if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3714 if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3715 if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3716 }
3717 for( ;i<n;++i){ if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3718 }
3719 }
3720 return RSB_ERR_NO_ERROR;
3721 }
3722 else
3723 #endif
3724 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3725 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
3726 {
3727 rsb_nnz_idx_t i;
3728 const double complex *a = ap; const double complex *b = bp;
3729
3730 {
3731 for(i=0;i+15<n;i+=16){
3732 if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3733 if( a[incx*(i+1 )]!=b[incy*(i+1 )]) goto differing;
3734 if( a[incx*(i+2 )]!=b[incy*(i+2 )]) goto differing;
3735 if( a[incx*(i+3 )]!=b[incy*(i+3 )]) goto differing;
3736 if( a[incx*(i+4 )]!=b[incy*(i+4 )]) goto differing;
3737 if( a[incx*(i+5 )]!=b[incy*(i+5 )]) goto differing;
3738 if( a[incx*(i+6 )]!=b[incy*(i+6 )]) goto differing;
3739 if( a[incx*(i+7 )]!=b[incy*(i+7 )]) goto differing;
3740 if( a[incx*(i+8 )]!=b[incy*(i+8 )]) goto differing;
3741 if( a[incx*(i+9 )]!=b[incy*(i+9 )]) goto differing;
3742 if( a[incx*(i+10 )]!=b[incy*(i+10 )]) goto differing;
3743 if( a[incx*(i+11 )]!=b[incy*(i+11 )]) goto differing;
3744 if( a[incx*(i+12 )]!=b[incy*(i+12 )]) goto differing;
3745 if( a[incx*(i+13 )]!=b[incy*(i+13 )]) goto differing;
3746 if( a[incx*(i+14 )]!=b[incy*(i+14 )]) goto differing;
3747 if( a[incx*(i+15 )]!=b[incy*(i+15 )]) goto differing;
3748 }
3749 for( ;i<n;++i){ if( a[incx*(i+0 )]!=b[incy*(i+0 )]) goto differing;
3750 }
3751 }
3752 return RSB_ERR_NO_ERROR;
3753 }
3754 else
3755 #endif
3756 return RSB_ERR_UNSUPPORTED_TYPE;
3757 differing:
3758 return RSB_ERR_GENERIC_ERROR;
3759 }
3760
rsb__xcopy_strided_typed(void * a,const void * b,rsb_nnz_idx_t toi,rsb_nnz_idx_t foi,rsb_nnz_idx_t n,rsb_type_t typecode,rsb_nnz_idx_t incx,rsb_nnz_idx_t incy)3761 static rsb_err_t rsb__xcopy_strided_typed(void * a, const void * b, rsb_nnz_idx_t toi, rsb_nnz_idx_t foi, rsb_nnz_idx_t n,rsb_type_t typecode, rsb_nnz_idx_t incx, rsb_nnz_idx_t incy)
3762 {
3763 /*!
3764 * a[toi:toi+n] <- b[foi:foi+n]
3765 *
3766 * \param array an array pointer
3767 * \param type a valid type code
3768 *
3769 * \return \rsberrcodemsg
3770 * */
3771 if(incx==1 && incy==1)
3772 return rsb__xcopy(a,b,toi,foi,n,RSB_SIZEOF(typecode));
3773 /* else */
3774 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3775 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
3776 {
3777 rsb_nnz_idx_t i;
3778 double *ap = a; const double *bp = b;
3779 ap+=toi;
3780 bp+=foi;
3781 {
3782 for(i=0;i+15<n;i+=16){
3783 ap[(i+0 )*incx] = bp[(i+0 )*incy];
3784 ap[(i+1 )*incx] = bp[(i+1 )*incy];
3785 ap[(i+2 )*incx] = bp[(i+2 )*incy];
3786 ap[(i+3 )*incx] = bp[(i+3 )*incy];
3787 ap[(i+4 )*incx] = bp[(i+4 )*incy];
3788 ap[(i+5 )*incx] = bp[(i+5 )*incy];
3789 ap[(i+6 )*incx] = bp[(i+6 )*incy];
3790 ap[(i+7 )*incx] = bp[(i+7 )*incy];
3791 ap[(i+8 )*incx] = bp[(i+8 )*incy];
3792 ap[(i+9 )*incx] = bp[(i+9 )*incy];
3793 ap[(i+10 )*incx] = bp[(i+10 )*incy];
3794 ap[(i+11 )*incx] = bp[(i+11 )*incy];
3795 ap[(i+12 )*incx] = bp[(i+12 )*incy];
3796 ap[(i+13 )*incx] = bp[(i+13 )*incy];
3797 ap[(i+14 )*incx] = bp[(i+14 )*incy];
3798 ap[(i+15 )*incx] = bp[(i+15 )*incy];
3799 }
3800 for( ;i<n;++i){ ap[(i+0 )*incx] = bp[(i+0 )*incy];
3801 }
3802 }
3803 ;
3804 return RSB_ERR_NO_ERROR;
3805 }
3806 else
3807 #endif
3808 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3809 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
3810 {
3811 rsb_nnz_idx_t i;
3812 float *ap = a; const float *bp = b;
3813 ap+=toi;
3814 bp+=foi;
3815 {
3816 for(i=0;i+15<n;i+=16){
3817 ap[(i+0 )*incx] = bp[(i+0 )*incy];
3818 ap[(i+1 )*incx] = bp[(i+1 )*incy];
3819 ap[(i+2 )*incx] = bp[(i+2 )*incy];
3820 ap[(i+3 )*incx] = bp[(i+3 )*incy];
3821 ap[(i+4 )*incx] = bp[(i+4 )*incy];
3822 ap[(i+5 )*incx] = bp[(i+5 )*incy];
3823 ap[(i+6 )*incx] = bp[(i+6 )*incy];
3824 ap[(i+7 )*incx] = bp[(i+7 )*incy];
3825 ap[(i+8 )*incx] = bp[(i+8 )*incy];
3826 ap[(i+9 )*incx] = bp[(i+9 )*incy];
3827 ap[(i+10 )*incx] = bp[(i+10 )*incy];
3828 ap[(i+11 )*incx] = bp[(i+11 )*incy];
3829 ap[(i+12 )*incx] = bp[(i+12 )*incy];
3830 ap[(i+13 )*incx] = bp[(i+13 )*incy];
3831 ap[(i+14 )*incx] = bp[(i+14 )*incy];
3832 ap[(i+15 )*incx] = bp[(i+15 )*incy];
3833 }
3834 for( ;i<n;++i){ ap[(i+0 )*incx] = bp[(i+0 )*incy];
3835 }
3836 }
3837 ;
3838 return RSB_ERR_NO_ERROR;
3839 }
3840 else
3841 #endif
3842 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3843 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3844 {
3845 rsb_nnz_idx_t i;
3846 float complex *ap = a; const float complex *bp = b;
3847 ap+=toi;
3848 bp+=foi;
3849 {
3850 for(i=0;i+15<n;i+=16){
3851 ap[(i+0 )*incx] = bp[(i+0 )*incy];
3852 ap[(i+1 )*incx] = bp[(i+1 )*incy];
3853 ap[(i+2 )*incx] = bp[(i+2 )*incy];
3854 ap[(i+3 )*incx] = bp[(i+3 )*incy];
3855 ap[(i+4 )*incx] = bp[(i+4 )*incy];
3856 ap[(i+5 )*incx] = bp[(i+5 )*incy];
3857 ap[(i+6 )*incx] = bp[(i+6 )*incy];
3858 ap[(i+7 )*incx] = bp[(i+7 )*incy];
3859 ap[(i+8 )*incx] = bp[(i+8 )*incy];
3860 ap[(i+9 )*incx] = bp[(i+9 )*incy];
3861 ap[(i+10 )*incx] = bp[(i+10 )*incy];
3862 ap[(i+11 )*incx] = bp[(i+11 )*incy];
3863 ap[(i+12 )*incx] = bp[(i+12 )*incy];
3864 ap[(i+13 )*incx] = bp[(i+13 )*incy];
3865 ap[(i+14 )*incx] = bp[(i+14 )*incy];
3866 ap[(i+15 )*incx] = bp[(i+15 )*incy];
3867 }
3868 for( ;i<n;++i){ ap[(i+0 )*incx] = bp[(i+0 )*incy];
3869 }
3870 }
3871 ;
3872 return RSB_ERR_NO_ERROR;
3873 }
3874 else
3875 #endif
3876 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
3877 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
3878 {
3879 rsb_nnz_idx_t i;
3880 double complex *ap = a; const double complex *bp = b;
3881 ap+=toi;
3882 bp+=foi;
3883 {
3884 for(i=0;i+15<n;i+=16){
3885 ap[(i+0 )*incx] = bp[(i+0 )*incy];
3886 ap[(i+1 )*incx] = bp[(i+1 )*incy];
3887 ap[(i+2 )*incx] = bp[(i+2 )*incy];
3888 ap[(i+3 )*incx] = bp[(i+3 )*incy];
3889 ap[(i+4 )*incx] = bp[(i+4 )*incy];
3890 ap[(i+5 )*incx] = bp[(i+5 )*incy];
3891 ap[(i+6 )*incx] = bp[(i+6 )*incy];
3892 ap[(i+7 )*incx] = bp[(i+7 )*incy];
3893 ap[(i+8 )*incx] = bp[(i+8 )*incy];
3894 ap[(i+9 )*incx] = bp[(i+9 )*incy];
3895 ap[(i+10 )*incx] = bp[(i+10 )*incy];
3896 ap[(i+11 )*incx] = bp[(i+11 )*incy];
3897 ap[(i+12 )*incx] = bp[(i+12 )*incy];
3898 ap[(i+13 )*incx] = bp[(i+13 )*incy];
3899 ap[(i+14 )*incx] = bp[(i+14 )*incy];
3900 ap[(i+15 )*incx] = bp[(i+15 )*incy];
3901 }
3902 for( ;i<n;++i){ ap[(i+0 )*incx] = bp[(i+0 )*incy];
3903 }
3904 }
3905 ;
3906 return RSB_ERR_NO_ERROR;
3907 }
3908 else
3909 #endif
3910 return RSB_ERR_NO_ERROR;
3911 }
3912
rsb__sqrt_of_sum_of_fabs_diffs(const void * a,const void * b,void * err,rsb_type_t type,size_t n)3913 rsb_err_t rsb__sqrt_of_sum_of_fabs_diffs(const void * a, const void * b, void *err, rsb_type_t type, size_t n)
3914 {
3915 /*!
3916 * Will compute the square root of the sum of the squares of the vectors elements differences.
3917 * \param array an array pointer
3918 * \param type a valid type code
3919 * \param n the input array length
3920 *
3921 * FIXME
3922 *
3923 * \return \rsberrcodemsg
3924 * */
3925 size_t i;
3926 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
3927 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
3928 {
3929 const double*ta = a; const double*tb = b;
3930 *((double*)err) = ((double)(0));
3931 {
3932 for(i=0;i+15<n;i+=16){
3933 *((double*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3934 *((double*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
3935 *((double*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
3936 *((double*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
3937 *((double*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
3938 *((double*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
3939 *((double*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
3940 *((double*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
3941 *((double*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
3942 *((double*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
3943 *((double*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
3944 *((double*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
3945 *((double*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
3946 *((double*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
3947 *((double*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
3948 *((double*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
3949 }
3950 for( ;i<n;++i){ *((double*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3951 }
3952 }
3953 ;
3954 *((double*)err) = sqrt((*((double*)err)));
3955 }
3956 else
3957 #endif
3958 #ifdef RSB_NUMERICAL_TYPE_FLOAT
3959 if( type == RSB_NUMERICAL_TYPE_FLOAT )
3960 {
3961 const float*ta = a; const float*tb = b;
3962 *((float*)err) = ((float)(0));
3963 {
3964 for(i=0;i+15<n;i+=16){
3965 *((float*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3966 *((float*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
3967 *((float*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
3968 *((float*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
3969 *((float*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
3970 *((float*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
3971 *((float*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
3972 *((float*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
3973 *((float*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
3974 *((float*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
3975 *((float*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
3976 *((float*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
3977 *((float*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
3978 *((float*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
3979 *((float*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
3980 *((float*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
3981 }
3982 for( ;i<n;++i){ *((float*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3983 }
3984 }
3985 ;
3986 *((float*)err) = sqrtf((*((float*)err)));
3987 }
3988 else
3989 #endif
3990 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
3991 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
3992 {
3993 const float complex*ta = a; const float complex*tb = b;
3994 *((float complex*)err) = ((float complex)(0));
3995 {
3996 for(i=0;i+15<n;i+=16){
3997 *((float complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
3998 *((float complex*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
3999 *((float complex*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
4000 *((float complex*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
4001 *((float complex*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
4002 *((float complex*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
4003 *((float complex*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
4004 *((float complex*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
4005 *((float complex*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
4006 *((float complex*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
4007 *((float complex*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
4008 *((float complex*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
4009 *((float complex*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
4010 *((float complex*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
4011 *((float complex*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
4012 *((float complex*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
4013 }
4014 for( ;i<n;++i){ *((float complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
4015 }
4016 }
4017 ;
4018 *((float complex*)err) = csqrtf((*((float complex*)err)));
4019 }
4020 else
4021 #endif
4022 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4023 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4024 {
4025 const double complex*ta = a; const double complex*tb = b;
4026 *((double complex*)err) = ((double complex)(0));
4027 {
4028 for(i=0;i+15<n;i+=16){
4029 *((double complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
4030 *((double complex*)(err))+=(ta[i+1 ]-tb[i+1 ])*(ta[i+1 ]-tb[i+1 ]);
4031 *((double complex*)(err))+=(ta[i+2 ]-tb[i+2 ])*(ta[i+2 ]-tb[i+2 ]);
4032 *((double complex*)(err))+=(ta[i+3 ]-tb[i+3 ])*(ta[i+3 ]-tb[i+3 ]);
4033 *((double complex*)(err))+=(ta[i+4 ]-tb[i+4 ])*(ta[i+4 ]-tb[i+4 ]);
4034 *((double complex*)(err))+=(ta[i+5 ]-tb[i+5 ])*(ta[i+5 ]-tb[i+5 ]);
4035 *((double complex*)(err))+=(ta[i+6 ]-tb[i+6 ])*(ta[i+6 ]-tb[i+6 ]);
4036 *((double complex*)(err))+=(ta[i+7 ]-tb[i+7 ])*(ta[i+7 ]-tb[i+7 ]);
4037 *((double complex*)(err))+=(ta[i+8 ]-tb[i+8 ])*(ta[i+8 ]-tb[i+8 ]);
4038 *((double complex*)(err))+=(ta[i+9 ]-tb[i+9 ])*(ta[i+9 ]-tb[i+9 ]);
4039 *((double complex*)(err))+=(ta[i+10 ]-tb[i+10 ])*(ta[i+10 ]-tb[i+10 ]);
4040 *((double complex*)(err))+=(ta[i+11 ]-tb[i+11 ])*(ta[i+11 ]-tb[i+11 ]);
4041 *((double complex*)(err))+=(ta[i+12 ]-tb[i+12 ])*(ta[i+12 ]-tb[i+12 ]);
4042 *((double complex*)(err))+=(ta[i+13 ]-tb[i+13 ])*(ta[i+13 ]-tb[i+13 ]);
4043 *((double complex*)(err))+=(ta[i+14 ]-tb[i+14 ])*(ta[i+14 ]-tb[i+14 ]);
4044 *((double complex*)(err))+=(ta[i+15 ]-tb[i+15 ])*(ta[i+15 ]-tb[i+15 ]);
4045 }
4046 for( ;i<n;++i){ *((double complex*)(err))+=(ta[i+0 ]-tb[i+0 ])*(ta[i+0 ]-tb[i+0 ]);
4047 }
4048 }
4049 ;
4050 *((double complex*)err) = csqrt((*((double complex*)err)));
4051 }
4052 else
4053 #endif
4054 return RSB_ERR_UNSUPPORTED_TYPE ;
4055 return RSB_ERR_NO_ERROR;
4056 }
4057
rsb__fill_with_increasing_values(void * array,rsb_type_t type,size_t n)4058 rsb_err_t rsb__fill_with_increasing_values(void * array, rsb_type_t type, size_t n)
4059 {
4060 /*!
4061 * \ingroup gr_vec
4062 * FIXME : document me
4063 * starts with one.
4064 * \return \rsberrcodemsg
4065 * */
4066 size_t i;
4067 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4068 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4069 {
4070 double*ta = array;
4071 {
4072 for(i=0;i+15<n;i+=16){
4073 ta[i+0 ] = (const double)(i+0 +1);ta[i+1 ] = (const double)(i+1 +1);ta[i+2 ] = (const double)(i+2 +1);ta[i+3 ] = (const double)(i+3 +1);ta[i+4 ] = (const double)(i+4 +1);ta[i+5 ] = (const double)(i+5 +1);ta[i+6 ] = (const double)(i+6 +1);ta[i+7 ] = (const double)(i+7 +1);ta[i+8 ] = (const double)(i+8 +1);ta[i+9 ] = (const double)(i+9 +1);ta[i+10 ] = (const double)(i+10 +1);ta[i+11 ] = (const double)(i+11 +1);ta[i+12 ] = (const double)(i+12 +1);ta[i+13 ] = (const double)(i+13 +1);ta[i+14 ] = (const double)(i+14 +1);ta[i+15 ] = (const double)(i+15 +1);}
4074 for( ;i<n;++i){ ta[i+0 ] = (const double)(i+0 +1); }
4075 }
4076
4077 }
4078 else
4079 #endif
4080 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4081 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4082 {
4083 float*ta = array;
4084 {
4085 for(i=0;i+15<n;i+=16){
4086 ta[i+0 ] = (const float)(i+0 +1);ta[i+1 ] = (const float)(i+1 +1);ta[i+2 ] = (const float)(i+2 +1);ta[i+3 ] = (const float)(i+3 +1);ta[i+4 ] = (const float)(i+4 +1);ta[i+5 ] = (const float)(i+5 +1);ta[i+6 ] = (const float)(i+6 +1);ta[i+7 ] = (const float)(i+7 +1);ta[i+8 ] = (const float)(i+8 +1);ta[i+9 ] = (const float)(i+9 +1);ta[i+10 ] = (const float)(i+10 +1);ta[i+11 ] = (const float)(i+11 +1);ta[i+12 ] = (const float)(i+12 +1);ta[i+13 ] = (const float)(i+13 +1);ta[i+14 ] = (const float)(i+14 +1);ta[i+15 ] = (const float)(i+15 +1);}
4087 for( ;i<n;++i){ ta[i+0 ] = (const float)(i+0 +1); }
4088 }
4089
4090 }
4091 else
4092 #endif
4093 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4094 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4095 {
4096 float complex*ta = array;
4097 {
4098 for(i=0;i+15<n;i+=16){
4099 ta[i+0 ] = (const float complex)(i+0 +1);ta[i+1 ] = (const float complex)(i+1 +1);ta[i+2 ] = (const float complex)(i+2 +1);ta[i+3 ] = (const float complex)(i+3 +1);ta[i+4 ] = (const float complex)(i+4 +1);ta[i+5 ] = (const float complex)(i+5 +1);ta[i+6 ] = (const float complex)(i+6 +1);ta[i+7 ] = (const float complex)(i+7 +1);ta[i+8 ] = (const float complex)(i+8 +1);ta[i+9 ] = (const float complex)(i+9 +1);ta[i+10 ] = (const float complex)(i+10 +1);ta[i+11 ] = (const float complex)(i+11 +1);ta[i+12 ] = (const float complex)(i+12 +1);ta[i+13 ] = (const float complex)(i+13 +1);ta[i+14 ] = (const float complex)(i+14 +1);ta[i+15 ] = (const float complex)(i+15 +1);}
4100 for( ;i<n;++i){ ta[i+0 ] = (const float complex)(i+0 +1); }
4101 }
4102
4103 }
4104 else
4105 #endif
4106 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4107 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4108 {
4109 double complex*ta = array;
4110 {
4111 for(i=0;i+15<n;i+=16){
4112 ta[i+0 ] = (const double complex)(i+0 +1);ta[i+1 ] = (const double complex)(i+1 +1);ta[i+2 ] = (const double complex)(i+2 +1);ta[i+3 ] = (const double complex)(i+3 +1);ta[i+4 ] = (const double complex)(i+4 +1);ta[i+5 ] = (const double complex)(i+5 +1);ta[i+6 ] = (const double complex)(i+6 +1);ta[i+7 ] = (const double complex)(i+7 +1);ta[i+8 ] = (const double complex)(i+8 +1);ta[i+9 ] = (const double complex)(i+9 +1);ta[i+10 ] = (const double complex)(i+10 +1);ta[i+11 ] = (const double complex)(i+11 +1);ta[i+12 ] = (const double complex)(i+12 +1);ta[i+13 ] = (const double complex)(i+13 +1);ta[i+14 ] = (const double complex)(i+14 +1);ta[i+15 ] = (const double complex)(i+15 +1);}
4113 for( ;i<n;++i){ ta[i+0 ] = (const double complex)(i+0 +1); }
4114 }
4115
4116 }
4117 else
4118 #endif
4119 return RSB_ERR_UNSUPPORTED_TYPE ;
4120 return RSB_ERR_NO_ERROR;
4121 }
4122
rsb__util_do_conjugate(void * array,rsb_type_t type,size_t n)4123 rsb_err_t rsb__util_do_conjugate(void * array, rsb_type_t type, size_t n)
4124 {
4125 /*!
4126 * \ingroup gr_vec
4127 * \return \rsberrcodemsg
4128 * */
4129 size_t i;
4130 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4131 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4132 return RSB_ERR_NO_ERROR;
4133 else
4134 #endif
4135 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4136 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4137 return RSB_ERR_NO_ERROR;
4138 else
4139 #endif
4140 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4141 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4142 {
4143 float complex*ta = array;
4144 {
4145 for(i=0;i+15<n;i+=16){
4146 ta[i+0 ] = conjf(ta[i+0 ]);ta[i+1 ] = conjf(ta[i+1 ]);ta[i+2 ] = conjf(ta[i+2 ]);ta[i+3 ] = conjf(ta[i+3 ]);ta[i+4 ] = conjf(ta[i+4 ]);ta[i+5 ] = conjf(ta[i+5 ]);ta[i+6 ] = conjf(ta[i+6 ]);ta[i+7 ] = conjf(ta[i+7 ]);ta[i+8 ] = conjf(ta[i+8 ]);ta[i+9 ] = conjf(ta[i+9 ]);ta[i+10 ] = conjf(ta[i+10 ]);ta[i+11 ] = conjf(ta[i+11 ]);ta[i+12 ] = conjf(ta[i+12 ]);ta[i+13 ] = conjf(ta[i+13 ]);ta[i+14 ] = conjf(ta[i+14 ]);ta[i+15 ] = conjf(ta[i+15 ]);}
4147 for( ;i<n;++i){ ta[i+0 ] = conjf(ta[i+0 ]); }
4148 }
4149
4150 }
4151 else
4152 #endif
4153 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4154 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4155 {
4156 double complex*ta = array;
4157 {
4158 for(i=0;i+15<n;i+=16){
4159 ta[i+0 ] = conj(ta[i+0 ]);ta[i+1 ] = conj(ta[i+1 ]);ta[i+2 ] = conj(ta[i+2 ]);ta[i+3 ] = conj(ta[i+3 ]);ta[i+4 ] = conj(ta[i+4 ]);ta[i+5 ] = conj(ta[i+5 ]);ta[i+6 ] = conj(ta[i+6 ]);ta[i+7 ] = conj(ta[i+7 ]);ta[i+8 ] = conj(ta[i+8 ]);ta[i+9 ] = conj(ta[i+9 ]);ta[i+10 ] = conj(ta[i+10 ]);ta[i+11 ] = conj(ta[i+11 ]);ta[i+12 ] = conj(ta[i+12 ]);ta[i+13 ] = conj(ta[i+13 ]);ta[i+14 ] = conj(ta[i+14 ]);ta[i+15 ] = conj(ta[i+15 ]);}
4160 for( ;i<n;++i){ ta[i+0 ] = conj(ta[i+0 ]); }
4161 }
4162
4163 }
4164 else
4165 #endif
4166 return RSB_ERR_UNSUPPORTED_TYPE ;
4167 return RSB_ERR_NO_ERROR;
4168 }
4169
rsb__util_do_negate(void * array,rsb_type_t type,size_t n)4170 rsb_err_t rsb__util_do_negate(void * array, rsb_type_t type, size_t n)
4171 {
4172 /*!
4173 * \ingroup gr_vec
4174 * Will negate the input n elements long array of type type.
4175 * \param array an array pointer
4176 * \param type a valid type code
4177 * \param n the input array length
4178 *
4179 * \return \rsberrcodemsg
4180 * */
4181 size_t i;
4182 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4183 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4184 {
4185 double*ta = array;
4186 {
4187 for(i=0;i+15<n;i+=16){
4188 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4189 for( ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4190 }
4191 }
4192 else
4193 #endif
4194 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4195 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4196 {
4197 float*ta = array;
4198 {
4199 for(i=0;i+15<n;i+=16){
4200 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4201 for( ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4202 }
4203 }
4204 else
4205 #endif
4206 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4207 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4208 {
4209 float complex*ta = array;
4210 {
4211 for(i=0;i+15<n;i+=16){
4212 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4213 for( ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4214 }
4215 }
4216 else
4217 #endif
4218 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4219 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4220 {
4221 double complex*ta = array;
4222 {
4223 for(i=0;i+15<n;i+=16){
4224 ta[i+0 ] = -ta[i+0 ];ta[i+1 ] = -ta[i+1 ];ta[i+2 ] = -ta[i+2 ];ta[i+3 ] = -ta[i+3 ];ta[i+4 ] = -ta[i+4 ];ta[i+5 ] = -ta[i+5 ];ta[i+6 ] = -ta[i+6 ];ta[i+7 ] = -ta[i+7 ];ta[i+8 ] = -ta[i+8 ];ta[i+9 ] = -ta[i+9 ];ta[i+10 ] = -ta[i+10 ];ta[i+11 ] = -ta[i+11 ];ta[i+12 ] = -ta[i+12 ];ta[i+13 ] = -ta[i+13 ];ta[i+14 ] = -ta[i+14 ];ta[i+15 ] = -ta[i+15 ];}
4225 for( ;i<n;++i){ ta[i+0 ] = -ta[i+0 ]; }
4226 }
4227 }
4228 else
4229 #endif
4230 return RSB_ERR_UNSUPPORTED_TYPE ;
4231 return RSB_ERR_NO_ERROR;
4232 }
4233
rsb__util_find_min(void * minp,const void * array,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)4234 rsb_err_t rsb__util_find_min(void * minp, const void * array, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
4235 {
4236 /*!
4237 * \ingroup gr_vec
4238 *
4239 * \return \rsberrcodemsg
4240 * */
4241 size_t i;
4242 if(n<1)return RSB_ERR_BADARGS;
4243 if(inc<1)return RSB_ERR_BADARGS;
4244 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4245 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4246 {const double * ap = array;double *mp = minp;
4247 *mp = *ap;for(i = 1;i<n;++i){if(fabs(ap[i*inc])<fabs(*mp) )*mp = ap[i*inc];
4248 }}
4249 else
4250 #endif
4251 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4252 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4253 {const float * ap = array;float *mp = minp;
4254 *mp = *ap;for(i = 1;i<n;++i){if(fabsf(ap[i*inc])<fabsf(*mp) )*mp = ap[i*inc];
4255 }}
4256 else
4257 #endif
4258 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4259 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4260 {const float complex * ap = array;float complex *mp = minp;
4261 *mp = *ap;for(i = 1;i<n;++i){if(cabsf(ap[i*inc])<cabsf(*mp) )*mp = ap[i*inc];
4262 }}
4263 else
4264 #endif
4265 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4266 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4267 {const double complex * ap = array;double complex *mp = minp;
4268 *mp = *ap;for(i = 1;i<n;++i){if(cabs(ap[i*inc])<cabs(*mp) )*mp = ap[i*inc];
4269 }}
4270 else
4271 #endif
4272 return RSB_ERR_UNSUPPORTED_TYPE ;
4273 return RSB_ERR_NO_ERROR;
4274 }
4275
rsb__util_find_max(void * maxp,const void * array,rsb_type_t type,size_t n,rsb_nnz_idx_t inc)4276 rsb_err_t rsb__util_find_max(void * maxp, const void * array, rsb_type_t type, size_t n, rsb_nnz_idx_t inc)
4277 {
4278 /*!
4279 * \ingroup gr_vec
4280 *
4281 * \return \rsberrcodemsg
4282 * */
4283 size_t i;
4284 if(n<1)return RSB_ERR_BADARGS;
4285 if(inc<1)return RSB_ERR_BADARGS;
4286 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4287 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4288 {const double * ap = array;double *mp = maxp;
4289 *mp = *ap;for(i=1;i<n;++i){if(fabs(ap[i*inc])>fabs(*mp))*mp = ap[i*inc];
4290 }}
4291 else
4292 #endif
4293 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4294 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4295 {const float * ap = array;float *mp = maxp;
4296 *mp = *ap;for(i=1;i<n;++i){if(fabsf(ap[i*inc])>fabsf(*mp))*mp = ap[i*inc];
4297 }}
4298 else
4299 #endif
4300 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4301 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4302 {const float complex * ap = array;float complex *mp = maxp;
4303 *mp = *ap;for(i=1;i<n;++i){if(cabsf(ap[i*inc])>cabsf(*mp))*mp = ap[i*inc];
4304 }}
4305 else
4306 #endif
4307 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4308 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4309 {const double complex * ap = array;double complex *mp = maxp;
4310 *mp = *ap;for(i=1;i<n;++i){if(cabs(ap[i*inc])>cabs(*mp))*mp = ap[i*inc];
4311 }}
4312 else
4313 #endif
4314 return RSB_ERR_UNSUPPORTED_TYPE ;
4315 return RSB_ERR_NO_ERROR;
4316 }
4317
rsb__util_drop_to_zero_if_above_threshold(void * array,rsb_type_t type,size_t n,const void * threshold)4318 rsb_err_t rsb__util_drop_to_zero_if_above_threshold(void * array, rsb_type_t type, size_t n, const void * threshold)
4319 {
4320 /*!
4321 * \ingroup gr_vec
4322 *
4323 * \return \rsberrcodemsg
4324 * */
4325 size_t i;
4326 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4327 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4328 {const double th = (*(const double*)(threshold)); double*ta = array;
4329 for(i = 0;i<n;++i)
4330 {if(fabs(th)<fabs(ta[i]))ta[i] = ((double)(0));}}
4331 else
4332 #endif
4333 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4334 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4335 {const float th = (*(const float*)(threshold)); float*ta = array;
4336 for(i = 0;i<n;++i)
4337 {if(fabsf(th)<fabsf(ta[i]))ta[i] = ((float)(0));}}
4338 else
4339 #endif
4340 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4341 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4342 {const float complex th = (*(const float complex*)(threshold)); float complex*ta = array;
4343 for(i = 0;i<n;++i)
4344 {if(cabsf(th)<cabsf(ta[i]))ta[i] = ((float complex)(0));}}
4345 else
4346 #endif
4347 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4348 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4349 {const double complex th = (*(const double complex*)(threshold)); double complex*ta = array;
4350 for(i = 0;i<n;++i)
4351 {if(cabs(th)<cabs(ta[i]))ta[i] = ((double complex)(0));}}
4352 else
4353 #endif
4354 return RSB_ERR_UNSUPPORTED_TYPE ;
4355 return RSB_ERR_NO_ERROR;
4356 }
4357
rsb__util_count_positive(void * array,rsb_type_t type,size_t n)4358 rsb_nnz_idx_t rsb__util_count_positive(void * array, rsb_type_t type, size_t n)
4359 {
4360 /*!
4361 * \ingroup gr_vec
4362 *
4363 * \return \rsberrcodemsg
4364 * */
4365 size_t i, c = 0;
4366 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4367 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4368 { double*ta = array;
4369 for(i=0;i<n;++i)
4370 c+=((ta[i])>(double)0);
4371 }else
4372 #endif
4373 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4374 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4375 { float*ta = array;
4376 for(i=0;i<n;++i)
4377 c+=((ta[i])>(float)0);
4378 }else
4379 #endif
4380 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4381 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4382 { float complex*ta = array;
4383 for(i=0;i<n;++i)
4384 c+=(crealf(ta[i])>(float)0);
4385 }else
4386 #endif
4387 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4388 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4389 { double complex*ta = array;
4390 for(i=0;i<n;++i)
4391 c+=(creal(ta[i])>(double)0);
4392 }else
4393 #endif
4394 return RSB_ERR_UNSUPPORTED_TYPE ;
4395 return c;
4396 }
4397
rsb__util_count_negative(void * array,rsb_type_t type,size_t n)4398 rsb_nnz_idx_t rsb__util_count_negative(void * array, rsb_type_t type, size_t n)
4399 {
4400 /*!
4401 * \ingroup gr_vec
4402 *
4403 * \return \rsberrcodemsg
4404 * */
4405 size_t i, c = 0;
4406 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4407 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4408 { double*ta = array;
4409 for(i=0;i<n;++i)
4410 c+=((ta[i])<(double)0);
4411 }else
4412 #endif
4413 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4414 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4415 { float*ta = array;
4416 for(i=0;i<n;++i)
4417 c+=((ta[i])<(float)0);
4418 }else
4419 #endif
4420 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4421 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4422 { float complex*ta = array;
4423 for(i=0;i<n;++i)
4424 c+=(crealf(ta[i])<(float)0);
4425 }else
4426 #endif
4427 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4428 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4429 { double complex*ta = array;
4430 for(i=0;i<n;++i)
4431 c+=(creal(ta[i])<(double)0);
4432 }else
4433 #endif
4434 return RSB_ERR_UNSUPPORTED_TYPE ;
4435 return c;
4436 }
4437
rsb__util_drop_to_zero_if_under_threshold(void * array,rsb_type_t type,size_t n,const void * threshold)4438 rsb_err_t rsb__util_drop_to_zero_if_under_threshold(void * array, rsb_type_t type, size_t n, const void * threshold)
4439 {
4440 /*!
4441 * \ingroup gr_vec
4442 *
4443 * \return \rsberrcodemsg
4444 * */
4445 size_t i;
4446 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4447 if( type == RSB_NUMERICAL_TYPE_DOUBLE ) {
4448 const double th = (*(double*)(threshold)); double*ta = ((double*)(array));
4449 for(i=0;i<n;++i){if(fabs(th)>fabs(ta[i]))ta[i] = ((double)(0));}}
4450 else
4451 #endif
4452 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4453 if( type == RSB_NUMERICAL_TYPE_FLOAT ) {
4454 const float th = (*(float*)(threshold)); float*ta = ((float*)(array));
4455 for(i=0;i<n;++i){if(fabsf(th)>fabsf(ta[i]))ta[i] = ((float)(0));}}
4456 else
4457 #endif
4458 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4459 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ) {
4460 const float complex th = (*(float complex*)(threshold)); float complex*ta = ((float complex*)(array));
4461 for(i=0;i<n;++i){if(cabsf(th)>cabsf(ta[i]))ta[i] = ((float complex)(0));}}
4462 else
4463 #endif
4464 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4465 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ) {
4466 const double complex th = (*(double complex*)(threshold)); double complex*ta = ((double complex*)(array));
4467 for(i=0;i<n;++i){if(cabs(th)>cabs(ta[i]))ta[i] = ((double complex)(0));}}
4468 else
4469 #endif
4470 return RSB_ERR_UNSUPPORTED_TYPE ;
4471 return RSB_ERR_NO_ERROR;
4472 }
4473
rsb__fill_with_ones(void * array,rsb_type_t type,size_t n,size_t incx)4474 rsb_err_t rsb__fill_with_ones(void * array, rsb_type_t type, size_t n, size_t incx){
4475 /*!
4476 * \ingroup gr_vec
4477 * Will set to one the input n elements long array of type type.
4478 * \param array an array pointer
4479 * \param type a valid type code
4480 * \param n the input array length
4481 *
4482 * \return \rsberrcodemsg
4483 * TODO:RENAME: rsb__fill_with_ones -> rsb__val_fill_with_ones.
4484 * */
4485 size_t i;
4486 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4487 if( type == RSB_NUMERICAL_TYPE_DOUBLE ){
4488 double*ta = ((double*)(array));
4489 for(i=0;i<n;++i) {ta[i*incx] = ((double)(1.0));}}
4490 else
4491 #endif
4492 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4493 if( type == RSB_NUMERICAL_TYPE_FLOAT ){
4494 float*ta = ((float*)(array));
4495 for(i=0;i<n;++i) {ta[i*incx] = ((float)(1.0));}}
4496 else
4497 #endif
4498 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4499 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX ){
4500 float complex*ta = ((float complex*)(array));
4501 for(i=0;i<n;++i) {ta[i*incx] = ((float complex)(1.0));}}
4502 else
4503 #endif
4504 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4505 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX ){
4506 double complex*ta = ((double complex*)(array));
4507 for(i=0;i<n;++i) {ta[i*incx] = ((double complex)(1.0));}}
4508 else
4509 #endif
4510 return RSB_ERR_UNSUPPORTED_TYPE ;
4511 return RSB_ERR_NO_ERROR;
4512 }
4513
rsb__debug_print_vectors_diff(const void * v1,const void * v2,size_t n,rsb_type_t type,size_t incx,size_t incy,int onlyfirst)4514 rsb_err_t rsb__debug_print_vectors_diff(const void * v1, const void * v2, size_t n, rsb_type_t type, size_t incx, size_t incy, int onlyfirst){
4515 /*!
4516 * A debug function for printing the difference of two vectors of a specified type, in parallel.
4517 * FIXME : It should take into account thresholds specific to each numerical type.
4518 **/
4519 #if RSB_ALLOW_STDOUT
4520 size_t i, differing = 0;
4521 if(!v1 || !v2)return RSB_ERR_BADARGS;
4522
4523 RSB_STDERR("\t vectors diff :\n");
4524
4525 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4526 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4527 {
4528 const double *v1p = v1,*v2p = v2; double th = 0.0001;
4529 for(i=0;i<n ;++i)
4530 if(fabs((double)(v1p[i*incx]-v2p[i*incy]))>th)/*FIXME : incomplete check*/
4531 { differing++;
4532 if((onlyfirst==0)||(onlyfirst>differing))
4533 RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING"\n",(rsb_printf_int_t)i, v1p[i*incx],v2p[i*incy] );
4534 }
4535 }
4536 else
4537 #endif
4538 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4539 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4540 {
4541 const float *v1p = v1,*v2p = v2; float th = 0.0001;
4542 for(i=0;i<n ;++i)
4543 if(fabs((double)(v1p[i*incx]-v2p[i*incy]))>th)/*FIXME : incomplete check*/
4544 { differing++;
4545 if((onlyfirst==0)||(onlyfirst>differing))
4546 RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING"\n",(rsb_printf_int_t)i, v1p[i*incx],v2p[i*incy] );
4547 }
4548 }
4549 else
4550 #endif
4551 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4552 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4553 {
4554 const float complex *v1p = v1,*v2p = v2; float th = 0.0001;
4555 for(i=0;i<n ;++i)
4556 if(crealf(v1p[i*incx])-crealf(v2p[i*incy])>th)/*FIXME : incomplete check*/{ differing++;
4557 if((onlyfirst==0)||(onlyfirst>differing))
4558 RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING"\n",(rsb_printf_int_t)i, crealf(v1p[i*incx]),cimagf(v1p[i*incx]),crealf(v2p[i*incy]),cimagf(v2p[i*incy]) );
4559 }
4560 }
4561 else
4562 #endif
4563 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4564 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4565 {
4566 const double complex *v1p = v1,*v2p = v2; double th = 0.0001;
4567 for(i=0;i<n ;++i)
4568 if(creal(v1p[i*incx])-creal(v2p[i*incy])>th)/*FIXME : incomplete check*/{ differing++;
4569 if((onlyfirst==0)||(onlyfirst>differing))
4570 RSB_STDOUT("%zd : "RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING"\n",(rsb_printf_int_t)i, creal(v1p[i*incx]),cimag(v1p[i*incx]),creal(v2p[i*incy]),cimag(v2p[i*incy]) );
4571 }
4572 }
4573 else
4574 #endif
4575 return RSB_ERR_UNSUPPORTED_TYPE ;
4576 if(differing>onlyfirst)RSB_STDOUT("...(for a total of %zd differing entries)...\n",(rsb_printf_int_t)(differing-onlyfirst));
4577 return RSB_ERR_NO_ERROR;
4578 #else
4579 return RSB_ERR_UNSUPPORTED_FEATURE;
4580 #endif
4581 }
4582
rsb__debug_print_value(const void * v,rsb_type_t type)4583 rsb_err_t rsb__debug_print_value(const void * v, rsb_type_t type){
4584 /*!
4585 **/
4586 #if RSB_ALLOW_STDOUT
4587 if(!v)return RSB_ERR_BADARGS;
4588
4589 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4590 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4591 {
4592 const double *v1p = v;
4593 RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING, v1p[0] );
4594 }
4595 else
4596 #endif
4597 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4598 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4599 {
4600 const float *v1p = v;
4601 RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING, v1p[0] );
4602 }
4603 else
4604 #endif
4605 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4606 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4607 {
4608 const float complex *v1p = v;
4609 RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING, crealf(v1p[0]),cimagf(v1p[0]) );
4610 }
4611 else
4612 #endif
4613 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4614 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4615 {
4616 const double complex *v1p = v;
4617 RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING, creal(v1p[0]),cimag(v1p[0]) );
4618 }
4619 else
4620 #endif
4621 return RSB_ERR_UNSUPPORTED_TYPE ;
4622 return RSB_ERR_NO_ERROR;
4623 #else
4624 return RSB_ERR_UNSUPPORTED_FEATURE;
4625 #endif
4626 }
4627
rsb__debug_print_vector_extra(const void * v1,size_t n,rsb_type_t type,size_t inc,int style,FILE * stream)4628 rsb_err_t rsb__debug_print_vector_extra(const void * v1, size_t n, rsb_type_t type, size_t inc, int style, FILE*stream){
4629 /*!
4630 * A debug function for printing two vectors of a specified type, in parallel.
4631 **/
4632 #if RSB_ALLOW_STDOUT
4633 rsb_nnz_idx_t i;
4634 int want_header = ( style == 0x1 );
4635 const char * ts = RSB_IS_MATRIX_TYPE_COMPLEX(type)?"complex":"real";
4636 const char * ss = RSB_SYMMETRY_STRING(RSB_FLAG_NOFLAGS);
4637
4638 if( n < 0 )
4639 goto errb;
4640
4641 if(!v1 || !stream)
4642 goto errb;
4643
4644 /*if(!want_header)
4645 RSB_STDERR("\t vectors :\n");*/
4646
4647 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4648 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4649 {
4650 const double *v1p = v1;
4651 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4652 for(i=0;i<n;++i)
4653 RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING "\n", v1p[i*inc] );
4654 }
4655 else
4656 #endif
4657 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4658 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4659 {
4660 const float *v1p = v1;
4661 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4662 for(i=0;i<n;++i)
4663 RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING "\n", v1p[i*inc] );
4664 }
4665 else
4666 #endif
4667 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4668 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4669 {
4670 const float complex *v1p = v1;
4671 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4672 for(i=0;i<n;++i)
4673 RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING "\n", crealf(v1p[i*inc]),cimagf(v1p[i*inc]) );
4674 }
4675 else
4676 #endif
4677 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4678 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4679 {
4680 const double complex *v1p = v1;
4681 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix array %s %s\n%zd %zd\n",ts,ss,(rsb_printf_int_t)n,(rsb_printf_int_t)1);
4682 for(i=0;i<n;++i)
4683 RSB_FPRINTF(stream,RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING "\n", creal(v1p[i*inc]),cimag(v1p[i*inc]) );
4684 }
4685 else
4686 #endif
4687 return RSB_ERR_UNSUPPORTED_TYPE ;
4688 return RSB_ERR_NO_ERROR;
4689 #else
4690 return RSB_ERR_UNSUPPORTED_FEATURE;
4691 #endif
4692 errb:
4693 return RSB_ERR_BADARGS;
4694 }
4695
rsb__debug_print_vector(const void * v1,size_t n,rsb_type_t type,size_t inc)4696 rsb_err_t rsb__debug_print_vector(const void * v1, size_t n, rsb_type_t type, size_t inc){
4697 return rsb__debug_print_vector_extra(v1, n, type, inc, 0x0, stdout);
4698 }
4699
rsb__debug_print_vectors(const void * v1,const void * v2,size_t n,size_t incx,size_t incy,rsb_type_t type)4700 rsb_err_t rsb__debug_print_vectors(const void * v1, const void * v2, size_t n, size_t incx, size_t incy, rsb_type_t type){
4701 /*!
4702 * A debug function for printing two vectors of a specified type, in parallel.
4703 **/
4704 #if RSB_ALLOW_STDOUT
4705 size_t i;
4706 if(!v1 || !v2)return RSB_ERR_BADARGS;
4707
4708 RSB_STDERR("\t vectors :\n");
4709
4710 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
4711 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
4712 {
4713 const double *v1p = v1,*v2p = v2;
4714 for(i=0;i<n;++i)
4715 RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING"\n",v1p[(i)*incx],v2p[(i)*incy]);
4716 }
4717 else
4718 #endif
4719 #ifdef RSB_NUMERICAL_TYPE_FLOAT
4720 if( type == RSB_NUMERICAL_TYPE_FLOAT )
4721 {
4722 const float *v1p = v1,*v2p = v2;
4723 for(i=0;i<n;++i)
4724 RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING"\n",v1p[(i)*incx],v2p[(i)*incy]);
4725 }
4726 else
4727 #endif
4728 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
4729 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
4730 {
4731 const float complex *v1p = v1,*v2p = v2;
4732 for(i=0;i<n;++i)
4733 RSB_STDOUT(RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING"\n",crealf(v1p[(i)*incx]),cimagf(v1p[(i)*incx]),crealf(v2p[(i)*incy]),cimagf(v2p[(i)*incy]));
4734 }
4735 else
4736 #endif
4737 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
4738 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
4739 {
4740 const double complex *v1p = v1,*v2p = v2;
4741 for(i=0;i<n;++i)
4742 RSB_STDOUT(RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING" "RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING"\n",creal(v1p[(i)*incx]),cimag(v1p[(i)*incx]),creal(v2p[(i)*incy]),cimag(v2p[(i)*incy]));
4743 }
4744 else
4745 #endif
4746 return RSB_ERR_UNSUPPORTED_TYPE ;
4747 return RSB_ERR_NO_ERROR;
4748 #else
4749 return RSB_ERR_UNSUPPORTED_FEATURE;
4750 #endif
4751 }
4752
4753
rsb__do_account_sorted_optimized_css(const rsb_coo_idx_t * MIndx,const rsb_coo_idx_t * mIndx,const rsb_coo_idx_t Mdim,const rsb_coo_idx_t mdim,const rsb_nnz_idx_t nnz,rsb_nnz_idx_t * elements_per_block_row,rsb_nnz_idx_t * blocks_per_block_row)4754 rsb_err_t rsb__do_account_sorted_optimized_css(
4755 const rsb_coo_idx_t * MIndx, const rsb_coo_idx_t * mIndx,
4756 const rsb_coo_idx_t Mdim, const rsb_coo_idx_t mdim,
4757 const rsb_nnz_idx_t nnz, rsb_nnz_idx_t * elements_per_block_row, rsb_nnz_idx_t * blocks_per_block_row
4758 )
4759 {
4760 /**
4761 \ingroup gr_internals
4762
4763 elements_per_block_row and blocks_per_block_row arrays should be blanked.
4764 FIXME : missing error handling.
4765 */
4766 rsb_err_t errval = RSB_ERR_NO_ERROR;
4767 rsb_nnz_idx_t n = 0;
4768
4769 if(blocks_per_block_row)
4770 for(n=0;n<nnz;++n)
4771 {
4772 RSB_DEBUG_ASSERT(MIndx[n]<Mdim);
4773 RSB_DEBUG_ASSERT(MIndx[n]>=0);
4774 elements_per_block_row[MIndx[n]]++;
4775 blocks_per_block_row [MIndx[n]]++;
4776 }
4777 else
4778 for(n=0;n<nnz;++n)
4779 {
4780 RSB_DEBUG_ASSERT(MIndx[n]<Mdim);
4781 RSB_DEBUG_ASSERT(MIndx[n]>=0);
4782 elements_per_block_row[MIndx[n]]++;
4783 }
4784 RSB_DO_ERR_RETURN(errval)
4785 }
4786
rsb__do_account_sorted_optimized(struct rsb_mtx_t * mtxAp,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_coo_idx_t Idim,const rsb_coo_idx_t Jdim,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_nnz_idx_t * elements_per_block_row,rsb_nnz_idx_t * blocks_per_block_row)4787 rsb_err_t rsb__do_account_sorted_optimized(
4788 struct rsb_mtx_t * mtxAp,
4789 const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA,
4790 const rsb_coo_idx_t Idim, const rsb_coo_idx_t Jdim,
4791 const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop,
4792 rsb_nnz_idx_t * elements_per_block_row,
4793 rsb_nnz_idx_t * blocks_per_block_row
4794 )
4795 {
4796 /**
4797 * \ingroup gr_internals
4798 * FIXME : document this
4799 */
4800 rsb_coo_idx_t blockrows = 0;
4801 rsb_coo_idx_t blockcolumns = 0;
4802 rsb_coo_idx_t baserow = 0;
4803 rsb_coo_idx_t basecolumn = 0;
4804 const rsb_coo_idx_t *Mpntr = NULL;
4805 const rsb_coo_idx_t *mpntr = NULL;
4806 const rsb_coo_idx_t *MIndx = NULL;
4807 const rsb_coo_idx_t *mIndx = NULL;
4808 rsb_blk_idx_t mI = 0, MI = 0;
4809 rsb_err_t errval = RSB_ERR_NO_ERROR;
4810 rsb_nnz_idx_t k = 0; /* will index a nnz sized array */
4811 int K = 0;
4812
4813 if(0)
4814 //if( flags & RSB_FLAG_SHOULD_DEBUG )
4815 errval = rsb__do_account_sorted( mtxAp, IA, JA, nnz, pinfop, elements_per_block_row, blocks_per_block_row);
4816
4817 if(nnz==0)
4818 {
4819 /* FIXME: new case, incomplete (useful for diagonal implicit matrices) */
4820 return RSB_ERR_NO_ERROR;
4821 }
4822
4823 #if RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
4824 if(!pinfop)
4825 {
4826 /* a performance fix */
4827 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
4828 return rsb__do_account_sorted_optimized_css(JA,IA,Jdim,Idim,nnz,elements_per_block_row,blocks_per_block_row);
4829 else
4830 return rsb__do_account_sorted_optimized_css(IA,JA,Idim,Jdim,nnz,elements_per_block_row,blocks_per_block_row);
4831 }
4832 #endif
4833
4834 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
4835 {
4836 mpntr = pinfop->rpntr;
4837 Mpntr = pinfop->cpntr;
4838 mIndx = IA;
4839 MIndx = JA;
4840 }
4841 else
4842 {
4843 Mpntr = pinfop->rpntr;
4844 mpntr = pinfop->cpntr;
4845 MIndx = IA;
4846 mIndx = JA;
4847 }
4848
4849 /* storage BCOR */
4850 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
4851 {
4852 k = mI = MI = 0;K = 0;
4853 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
4854 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
4855 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
4856 RSB_ASSERT( blockrows && blockcolumns);
4857 #else
4858 blockrows = Mpntr[MI+1] - Mpntr[MI];
4859 blockcolumns = mpntr[mI+1] - mpntr[mI];
4860 #endif
4861
4862
4863 k = mI = MI = K=0;
4864 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping preceding block rows .. */
4865 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping preceding block columns .. */
4866 blockrows = Mpntr[MI+1] - Mpntr[MI];
4867 blockcolumns = mpntr[mI+1] - mpntr[mI];
4868 baserow = Mpntr[MI];
4869 basecolumn = mpntr[mI];
4870 *elements_per_block_row = 0;
4871 *blocks_per_block_row = 0;
4872 elements_per_block_row[MI*0] += blockrows * blockcolumns;
4873 blocks_per_block_row[MI] +=1;
4874
4875 while(RSB_LIKELY(k<nnz))
4876 {
4877 #ifdef DEBUG
4878 if( MIndx[k] < baserow )
4879 {
4880 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k,(rsb_printf_int_t) (MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
4881 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
4882 errval = RSB_ERR_INTERNAL_ERROR;
4883 goto err;
4884 }
4885 #endif
4886
4887 if( mIndx[k] >= basecolumn+blockcolumns )
4888 {
4889 /* new block column, for sure */
4890
4891 while( mIndx[k] >= mpntr[mI+1] )++mI;
4892 blockcolumns = mpntr[mI+1] - mpntr[mI];
4893 basecolumn = mpntr[mI];
4894
4895 if( MIndx[k] >= baserow+blockrows )
4896 {
4897 /* new block row */
4898
4899 while( MIndx[k] >= Mpntr[MI+1] )++MI;
4900 blockrows = Mpntr[MI+1] - Mpntr[MI];
4901 baserow = Mpntr[MI];
4902 }
4903 else
4904 {
4905 /* same block row */
4906 }
4907 *elements_per_block_row += blockrows * blockcolumns;
4908 blocks_per_block_row[MI] +=1;
4909 ++K;
4910 }
4911 else
4912 if( MIndx[k] >= baserow+blockrows )
4913 {
4914 /* new row block, for sure */
4915
4916 while( MIndx[k] >= Mpntr[MI+1] )++MI;
4917 blockrows = Mpntr[MI+1] - Mpntr[MI];
4918 baserow = Mpntr[MI];
4919
4920 if( mIndx[k] < basecolumn )
4921 {
4922 /* new row block, new block column */
4923
4924 mI = 0;
4925 while( mIndx[k] >= mpntr[mI+1] )++mI;
4926 blockcolumns = mpntr[mI+1] - mpntr[mI];
4927 basecolumn = mpntr[mI];
4928 }
4929 else
4930 {
4931 /* new row block, same column */
4932 }
4933 /* get rid of this var : elements_per_block_row */
4934 *elements_per_block_row += blockrows * blockcolumns;
4935 blocks_per_block_row[MI] +=1;
4936 ++K;
4937 }
4938 else
4939 {
4940 /* same block row for sure */
4941 }
4942 ++k;
4943 }
4944 errval = RSB_ERR_NO_ERROR;goto ret;
4945 }
4946 /* storage BCSR */
4947 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
4948 {
4949 k = mI = MI = 0;K = 0;
4950 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
4951 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
4952 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
4953 RSB_ASSERT( blockrows && blockcolumns);
4954 #else
4955 blockrows = Mpntr[MI+1] - Mpntr[MI];
4956 blockcolumns = mpntr[mI+1] - mpntr[mI];
4957 #endif
4958
4959
4960 k = mI = MI = K=0;
4961 while( MIndx[k] >= (blockrows *(MI+1)) )++MI; /* skipping preceding block rows .. */
4962 while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI; /* skipping preceding block columns .. */
4963 blockrows = (blockrows *(MI+1)) - (blockrows *(MI));
4964 blockcolumns = (blockcolumns*(mI+1)) - (blockcolumns*(mI));
4965 baserow = (blockrows *(MI));
4966 basecolumn = (blockcolumns*(mI));
4967 *elements_per_block_row = 0;
4968 *blocks_per_block_row = 0;
4969 elements_per_block_row[MI*0] += blockrows * blockcolumns;
4970 blocks_per_block_row[MI] +=1;
4971
4972 while(RSB_LIKELY(k<nnz))
4973 {
4974 #ifdef DEBUG
4975 if( MIndx[k] < baserow )
4976 {
4977 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k,(rsb_printf_int_t) (MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
4978 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
4979 errval = RSB_ERR_INTERNAL_ERROR;
4980 goto err;
4981 }
4982 #endif
4983
4984 if( mIndx[k] >= basecolumn+blockcolumns )
4985 {
4986 /* new block column, for sure */
4987 mI = mIndx[k]/blockcolumns;
4988 basecolumn = (blockcolumns*(mI));
4989
4990 if( MIndx[k] >= baserow+blockrows )
4991 {
4992 /* new block row */
4993 MI = MIndx[k]/blockrows;
4994 baserow = (blockrows *(MI));
4995 }
4996 else
4997 {
4998 /* same block row */
4999 }
5000 *elements_per_block_row += blockrows * blockcolumns;
5001 blocks_per_block_row[MI] +=1;
5002 ++K;
5003 }
5004 else
5005 if( MIndx[k] >= baserow+blockrows )
5006 {
5007 /* new row block, for sure */
5008 MI = MIndx[k]/blockrows;
5009 baserow = (blockrows *(MI));
5010
5011 if( mIndx[k] < basecolumn )
5012 {
5013 /* new row block, new block column */
5014 mI = mIndx[k]/blockcolumns;
5015 basecolumn = (blockcolumns*(mI));
5016 }
5017 else
5018 {
5019 /* new row block, same column */
5020 }
5021 /* get rid of this var : elements_per_block_row */
5022 *elements_per_block_row += blockrows * blockcolumns;
5023 blocks_per_block_row[MI] +=1;
5024 ++K;
5025 }
5026 else
5027 {
5028 /* same block row for sure */
5029 }
5030 ++k;
5031 }
5032 errval = RSB_ERR_NO_ERROR;goto ret;
5033 }
5034 errval = RSB_ERR_INTERNAL_ERROR;
5035 ret: return errval;
5036 }
5037
rsb__do_insert_sorted_optimized_css(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * MIndx,const rsb_coo_idx_t * mIndx,const rsb_nnz_idx_t nnz)5038 rsb_err_t rsb__do_insert_sorted_optimized_css( struct rsb_mtx_t * mtxAp, const void *VA, const rsb_coo_idx_t * MIndx, const rsb_coo_idx_t * mIndx, const rsb_nnz_idx_t nnz)
5039 {
5040 /**
5041 \ingroup gr_internals
5042
5043 elements_per_block_row and blocks_per_block_row arrays should be blanked.
5044 FIXME : missing error handling.
5045 */
5046 rsb_err_t errval = RSB_ERR_NO_ERROR;
5047 rsb_nnz_idx_t n = 0;
5048
5049 /* in case of RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR, they are equal */
5050 if(mtxAp->VA != VA)
5051 rsb__memcpy(mtxAp->VA ,VA ,mtxAp->el_size*nnz);
5052
5053 for(n=0;n<nnz+1;++n)
5054 mtxAp->indptr[n] = n;
5055
5056 for(n=0;n<mtxAp->nnz;++n)
5057 mtxAp->bindx [n] = mIndx[n];
5058 mtxAp->bindx [nnz] = 0;
5059
5060 // should also set bindx, indptr,
5061 RSB_DO_ERR_RETURN(errval)
5062 }
5063
rsb__do_insert_sorted_optimized(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop)5064 rsb_err_t rsb__do_insert_sorted_optimized( struct rsb_mtx_t * mtxAp, const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop)
5065 {
5066 /*
5067 * FIXME ! UNFINISHED
5068 * and please note that linked format is incomplete, so it does not support well block column major
5069 */
5070 rsb_coo_idx_t blockrows = 0;
5071 rsb_coo_idx_t blockcolumns = 0;
5072 rsb_coo_idx_t baserow = 0;
5073 rsb_coo_idx_t basecolumn = 0;
5074 rsb_nnz_idx_t *indptr = mtxAp->indptr;
5075 rsb_nnz_idx_t *bindx = mtxAp->bindx;
5076 const rsb_coo_idx_t *Mpntr = NULL;
5077 const rsb_coo_idx_t *mpntr = NULL;
5078 const rsb_coo_idx_t *MIndx = NULL;
5079 const rsb_coo_idx_t *mIndx = NULL;
5080 rsb_blk_idx_t mI = 0, MI = 0;
5081 rsb_err_t errval = RSB_ERR_NO_ERROR;
5082 rsb_nnz_idx_t k = 0; /* will index a nnz sized array */
5083 rsb_nnz_idx_t K = 0;
5084
5085 if(nnz==0)
5086 {
5087 /* FIXME: new case, incomplete (useful for diagonal implicit matrices) */
5088 K = 0; /* if nnz == 0 then K == 0 */
5089 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5090 return RSB_ERR_NO_ERROR;
5091 }
5092
5093 if(0)
5094 return rsb__do_insert_sorted( mtxAp, VA, IA, JA, nnz, pinfop);
5095
5096 #if RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
5097 if(!pinfop)
5098 {
5099 /* a performance fix */
5100 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
5101 return rsb__do_insert_sorted_optimized_css( mtxAp, VA, JA, IA, nnz );
5102 else
5103 return rsb__do_insert_sorted_optimized_css( mtxAp, VA, IA, JA, nnz );
5104 }
5105 #endif
5106
5107 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
5108 {
5109 mpntr = pinfop->rpntr;
5110 Mpntr = pinfop->cpntr;
5111 mIndx = IA;
5112 MIndx = JA;
5113 }
5114 else
5115 {
5116 Mpntr = pinfop->rpntr;
5117 mpntr = pinfop->cpntr;
5118 MIndx = IA;
5119 mIndx = JA;
5120 }
5121
5122
5123 /* type double, storage BCOR */
5124 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE )
5125 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5126 {
5127 double * dst = mtxAp->VA;
5128 k = mI = MI = 0;K = 0;
5129 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5130 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5131 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5132 RSB_ASSERT( blockrows && blockcolumns);
5133 #else
5134 blockrows = Mpntr[MI+1] - Mpntr[MI];
5135 blockcolumns = mpntr[mI+1] - mpntr[mI];
5136 #endif
5137
5138 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping preceding block rows .. */
5139 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping preceding block columns .. */
5140 baserow = Mpntr[MI];
5141 basecolumn = mpntr[mI];
5142 bindx [ K ] = mI; /* a new block */
5143 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5144
5145
5146
5147 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5148 {
5149 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5150 }
5151
5152 while(RSB_LIKELY(k<nnz))
5153 {
5154 #ifdef DEBUG
5155 if( MIndx[k] < baserow )
5156 {
5157 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5158 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5159 errval = RSB_ERR_INTERNAL_ERROR;
5160 goto err;/* NOTE : this jump could be evil */
5161 }
5162 #endif
5163
5164 if( mIndx[k] >= basecolumn+blockcolumns )
5165 {
5166 /* new block column, for sure */
5167
5168 while( mIndx[k] >= mpntr[mI+1] )++mI;
5169 blockcolumns = mpntr[mI+1] - mpntr[mI];
5170 basecolumn = mpntr[mI];
5171
5172 if( MIndx[k] >= baserow+blockrows )
5173 {
5174 /* new block row */
5175
5176 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5177 blockrows = Mpntr[MI+1] - Mpntr[MI];
5178 baserow = Mpntr[MI];
5179 }
5180 else
5181 {
5182 /* same block row */
5183 }
5184 ++K;
5185 bindx [ K ] = mI; /* a new block */
5186 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5187 }
5188 else
5189 if( MIndx[k] >= baserow+blockrows )
5190 {
5191 /* new row block, for sure */
5192
5193 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5194 blockrows = Mpntr[MI+1] - Mpntr[MI];
5195 baserow = Mpntr[MI];
5196
5197 if( mIndx[k] < basecolumn )
5198 {
5199 /* new row block, new block column */
5200 mI = 0;
5201
5202 while( mIndx[k] >= mpntr[mI+1] )++mI;
5203 blockcolumns = mpntr[mI+1] - mpntr[mI];
5204 basecolumn = mpntr[mI];
5205 }
5206 else
5207 {
5208 /* new row block, same column */
5209 }
5210 ++K;
5211 bindx [ K ] = mI; /* a new block */
5212 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5213 }
5214 else
5215 {
5216 /* same block row for sure */
5217 }
5218 dst = mtxAp->VA;
5219 RSB_DEBUG_ASSERT(mI>=0);
5220 RSB_DEBUG_ASSERT(MI>=0);
5221
5222
5223 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5224 dst +=
5225 indptr[ K ]
5226 /*K * blockrows * blockcolumns*/
5227 /*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5228 ;
5229 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5230 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5231 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5232 dst[0] = ((const double*)VA)[k];
5233 ++k;
5234 }
5235 if(nnz)++K; /* if nnz == 0 then K = 0 */
5236 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5237 return RSB_ERR_NO_ERROR; /* FIXME ! */
5238 }
5239 /* type float, storage BCOR */
5240 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT )
5241 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5242 {
5243 float * dst = mtxAp->VA;
5244 k = mI = MI = 0;K = 0;
5245 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5246 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5247 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5248 RSB_ASSERT( blockrows && blockcolumns);
5249 #else
5250 blockrows = Mpntr[MI+1] - Mpntr[MI];
5251 blockcolumns = mpntr[mI+1] - mpntr[mI];
5252 #endif
5253
5254 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping preceding block rows .. */
5255 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping preceding block columns .. */
5256 baserow = Mpntr[MI];
5257 basecolumn = mpntr[mI];
5258 bindx [ K ] = mI; /* a new block */
5259 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5260
5261
5262
5263 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5264 {
5265 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5266 }
5267
5268 while(RSB_LIKELY(k<nnz))
5269 {
5270 #ifdef DEBUG
5271 if( MIndx[k] < baserow )
5272 {
5273 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5274 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5275 errval = RSB_ERR_INTERNAL_ERROR;
5276 goto err;/* NOTE : this jump could be evil */
5277 }
5278 #endif
5279
5280 if( mIndx[k] >= basecolumn+blockcolumns )
5281 {
5282 /* new block column, for sure */
5283
5284 while( mIndx[k] >= mpntr[mI+1] )++mI;
5285 blockcolumns = mpntr[mI+1] - mpntr[mI];
5286 basecolumn = mpntr[mI];
5287
5288 if( MIndx[k] >= baserow+blockrows )
5289 {
5290 /* new block row */
5291
5292 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5293 blockrows = Mpntr[MI+1] - Mpntr[MI];
5294 baserow = Mpntr[MI];
5295 }
5296 else
5297 {
5298 /* same block row */
5299 }
5300 ++K;
5301 bindx [ K ] = mI; /* a new block */
5302 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5303 }
5304 else
5305 if( MIndx[k] >= baserow+blockrows )
5306 {
5307 /* new row block, for sure */
5308
5309 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5310 blockrows = Mpntr[MI+1] - Mpntr[MI];
5311 baserow = Mpntr[MI];
5312
5313 if( mIndx[k] < basecolumn )
5314 {
5315 /* new row block, new block column */
5316 mI = 0;
5317
5318 while( mIndx[k] >= mpntr[mI+1] )++mI;
5319 blockcolumns = mpntr[mI+1] - mpntr[mI];
5320 basecolumn = mpntr[mI];
5321 }
5322 else
5323 {
5324 /* new row block, same column */
5325 }
5326 ++K;
5327 bindx [ K ] = mI; /* a new block */
5328 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5329 }
5330 else
5331 {
5332 /* same block row for sure */
5333 }
5334 dst = mtxAp->VA;
5335 RSB_DEBUG_ASSERT(mI>=0);
5336 RSB_DEBUG_ASSERT(MI>=0);
5337
5338
5339 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5340 dst +=
5341 indptr[ K ]
5342 /*K * blockrows * blockcolumns*/
5343 /*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5344 ;
5345 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5346 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5347 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5348 dst[0] = ((const float*)VA)[k];
5349 ++k;
5350 }
5351 if(nnz)++K; /* if nnz == 0 then K = 0 */
5352 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5353 return RSB_ERR_NO_ERROR; /* FIXME ! */
5354 }
5355 /* type float complex, storage BCOR */
5356 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
5357 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5358 {
5359 float complex * dst = mtxAp->VA;
5360 k = mI = MI = 0;K = 0;
5361 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5362 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5363 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5364 RSB_ASSERT( blockrows && blockcolumns);
5365 #else
5366 blockrows = Mpntr[MI+1] - Mpntr[MI];
5367 blockcolumns = mpntr[mI+1] - mpntr[mI];
5368 #endif
5369
5370 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping preceding block rows .. */
5371 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping preceding block columns .. */
5372 baserow = Mpntr[MI];
5373 basecolumn = mpntr[mI];
5374 bindx [ K ] = mI; /* a new block */
5375 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5376
5377
5378
5379 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5380 {
5381 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5382 }
5383
5384 while(RSB_LIKELY(k<nnz))
5385 {
5386 #ifdef DEBUG
5387 if( MIndx[k] < baserow )
5388 {
5389 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5390 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5391 errval = RSB_ERR_INTERNAL_ERROR;
5392 goto err;/* NOTE : this jump could be evil */
5393 }
5394 #endif
5395
5396 if( mIndx[k] >= basecolumn+blockcolumns )
5397 {
5398 /* new block column, for sure */
5399
5400 while( mIndx[k] >= mpntr[mI+1] )++mI;
5401 blockcolumns = mpntr[mI+1] - mpntr[mI];
5402 basecolumn = mpntr[mI];
5403
5404 if( MIndx[k] >= baserow+blockrows )
5405 {
5406 /* new block row */
5407
5408 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5409 blockrows = Mpntr[MI+1] - Mpntr[MI];
5410 baserow = Mpntr[MI];
5411 }
5412 else
5413 {
5414 /* same block row */
5415 }
5416 ++K;
5417 bindx [ K ] = mI; /* a new block */
5418 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5419 }
5420 else
5421 if( MIndx[k] >= baserow+blockrows )
5422 {
5423 /* new row block, for sure */
5424
5425 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5426 blockrows = Mpntr[MI+1] - Mpntr[MI];
5427 baserow = Mpntr[MI];
5428
5429 if( mIndx[k] < basecolumn )
5430 {
5431 /* new row block, new block column */
5432 mI = 0;
5433
5434 while( mIndx[k] >= mpntr[mI+1] )++mI;
5435 blockcolumns = mpntr[mI+1] - mpntr[mI];
5436 basecolumn = mpntr[mI];
5437 }
5438 else
5439 {
5440 /* new row block, same column */
5441 }
5442 ++K;
5443 bindx [ K ] = mI; /* a new block */
5444 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5445 }
5446 else
5447 {
5448 /* same block row for sure */
5449 }
5450 dst = mtxAp->VA;
5451 RSB_DEBUG_ASSERT(mI>=0);
5452 RSB_DEBUG_ASSERT(MI>=0);
5453
5454
5455 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5456 dst +=
5457 indptr[ K ]
5458 /*K * blockrows * blockcolumns*/
5459 /*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5460 ;
5461 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5462 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5463 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5464 dst[0] = ((const float complex*)VA)[k];
5465 ++k;
5466 }
5467 if(nnz)++K; /* if nnz == 0 then K = 0 */
5468 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5469 return RSB_ERR_NO_ERROR; /* FIXME ! */
5470 }
5471 /* type double complex, storage BCOR */
5472 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
5473 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCOR )
5474 {
5475 double complex * dst = mtxAp->VA;
5476 k = mI = MI = 0;K = 0;
5477 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5478 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5479 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5480 RSB_ASSERT( blockrows && blockcolumns);
5481 #else
5482 blockrows = Mpntr[MI+1] - Mpntr[MI];
5483 blockcolumns = mpntr[mI+1] - mpntr[mI];
5484 #endif
5485
5486 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping preceding block rows .. */
5487 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping preceding block columns .. */
5488 baserow = Mpntr[MI];
5489 basecolumn = mpntr[mI];
5490 bindx [ K ] = mI; /* a new block */
5491 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5492
5493
5494
5495 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5496 {
5497 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5498 }
5499
5500 while(RSB_LIKELY(k<nnz))
5501 {
5502 #ifdef DEBUG
5503 if( MIndx[k] < baserow )
5504 {
5505 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5506 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5507 errval = RSB_ERR_INTERNAL_ERROR;
5508 goto err;/* NOTE : this jump could be evil */
5509 }
5510 #endif
5511
5512 if( mIndx[k] >= basecolumn+blockcolumns )
5513 {
5514 /* new block column, for sure */
5515
5516 while( mIndx[k] >= mpntr[mI+1] )++mI;
5517 blockcolumns = mpntr[mI+1] - mpntr[mI];
5518 basecolumn = mpntr[mI];
5519
5520 if( MIndx[k] >= baserow+blockrows )
5521 {
5522 /* new block row */
5523
5524 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5525 blockrows = Mpntr[MI+1] - Mpntr[MI];
5526 baserow = Mpntr[MI];
5527 }
5528 else
5529 {
5530 /* same block row */
5531 }
5532 ++K;
5533 bindx [ K ] = mI; /* a new block */
5534 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5535 }
5536 else
5537 if( MIndx[k] >= baserow+blockrows )
5538 {
5539 /* new row block, for sure */
5540
5541 while( MIndx[k] >= Mpntr[MI+1] )++MI;
5542 blockrows = Mpntr[MI+1] - Mpntr[MI];
5543 baserow = Mpntr[MI];
5544
5545 if( mIndx[k] < basecolumn )
5546 {
5547 /* new row block, new block column */
5548 mI = 0;
5549
5550 while( mIndx[k] >= mpntr[mI+1] )++mI;
5551 blockcolumns = mpntr[mI+1] - mpntr[mI];
5552 basecolumn = mpntr[mI];
5553 }
5554 else
5555 {
5556 /* new row block, same column */
5557 }
5558 ++K;
5559 bindx [ K ] = mI; /* a new block */
5560 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5561 }
5562 else
5563 {
5564 /* same block row for sure */
5565 }
5566 dst = mtxAp->VA;
5567 RSB_DEBUG_ASSERT(mI>=0);
5568 RSB_DEBUG_ASSERT(MI>=0);
5569
5570
5571 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5572 dst +=
5573 indptr[ K ]
5574 /*K * blockrows * blockcolumns*/
5575 /*RSB_BLOCK_OFFSET(mtxAp,K)/mtxAp->el_size*/ /* FIXME : unfinished ! */
5576 ;
5577 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5578 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5579 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5580 dst[0] = ((const double complex*)VA)[k];
5581 ++k;
5582 }
5583 if(nnz)++K; /* if nnz == 0 then K = 0 */
5584 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5585 return RSB_ERR_NO_ERROR; /* FIXME ! */
5586 }
5587 /* type double, storage BCSR */
5588 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE )
5589 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5590 {
5591 double * dst = mtxAp->VA;
5592 k = mI = MI = 0;K = 0;
5593 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5594 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5595 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5596 RSB_ASSERT( blockrows && blockcolumns);
5597 #else
5598 blockrows = Mpntr[MI+1] - Mpntr[MI];
5599 blockcolumns = mpntr[mI+1] - mpntr[mI];
5600 #endif
5601
5602 while( MIndx[k] >= (blockrows *(MI+1)) )++MI; /* skipping preceding block rows .. */
5603 while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI; /* skipping preceding block columns .. */
5604 baserow = (blockrows *(MI));
5605 basecolumn = (blockcolumns*(mI));
5606 bindx [ K ] = mI; /* a new block */
5607 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5608
5609
5610
5611 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5612 {
5613 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5614 }
5615
5616 while(RSB_LIKELY(k<nnz))
5617 {
5618 #ifdef DEBUG
5619 if( MIndx[k] < baserow )
5620 {
5621 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5622 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5623 errval = RSB_ERR_INTERNAL_ERROR;
5624 goto err;/* NOTE : this jump could be evil */
5625 }
5626 #endif
5627
5628 if( mIndx[k] >= basecolumn+blockcolumns )
5629 {
5630 /* new block column, for sure */
5631 mI = mIndx[k]/blockcolumns;
5632 basecolumn = (blockcolumns*(mI));
5633
5634 if( MIndx[k] >= baserow+blockrows )
5635 {
5636 /* new block row */
5637 MI = MIndx[k]/blockrows;
5638 baserow = (blockrows *(MI));
5639 }
5640 else
5641 {
5642 /* same block row */
5643 }
5644 ++K;
5645 bindx [ K ] = mI; /* a new block */
5646 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5647 }
5648 else
5649 if( MIndx[k] >= baserow+blockrows )
5650 {
5651 /* new row block, for sure */
5652 MI = MIndx[k]/blockrows;
5653 baserow = (blockrows *(MI));
5654
5655 if( mIndx[k] < basecolumn )
5656 {
5657 /* new row block, new block column */
5658 mI = 0;
5659 mI = mIndx[k]/blockcolumns;
5660 basecolumn = (blockcolumns*(mI));
5661 }
5662 else
5663 {
5664 /* new row block, same column */
5665 }
5666 ++K;
5667 bindx [ K ] = mI; /* a new block */
5668 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5669 }
5670 else
5671 {
5672 /* same block row for sure */
5673 }
5674 dst = mtxAp->VA;
5675 RSB_DEBUG_ASSERT(mI>=0);
5676 RSB_DEBUG_ASSERT(MI>=0);
5677
5678
5679 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5680 dst +=
5681 K * blockrows * blockcolumns
5682 ;
5683 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5684 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5685 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5686 dst[0] = ((const double*)VA)[k];
5687 ++k;
5688 }
5689 if(nnz)++K; /* if nnz == 0 then K = 0 */
5690 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5691 return RSB_ERR_NO_ERROR; /* FIXME ! */
5692 }
5693 /* type float, storage BCSR */
5694 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT )
5695 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5696 {
5697 float * dst = mtxAp->VA;
5698 k = mI = MI = 0;K = 0;
5699 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5700 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5701 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5702 RSB_ASSERT( blockrows && blockcolumns);
5703 #else
5704 blockrows = Mpntr[MI+1] - Mpntr[MI];
5705 blockcolumns = mpntr[mI+1] - mpntr[mI];
5706 #endif
5707
5708 while( MIndx[k] >= (blockrows *(MI+1)) )++MI; /* skipping preceding block rows .. */
5709 while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI; /* skipping preceding block columns .. */
5710 baserow = (blockrows *(MI));
5711 basecolumn = (blockcolumns*(mI));
5712 bindx [ K ] = mI; /* a new block */
5713 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5714
5715
5716
5717 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5718 {
5719 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5720 }
5721
5722 while(RSB_LIKELY(k<nnz))
5723 {
5724 #ifdef DEBUG
5725 if( MIndx[k] < baserow )
5726 {
5727 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5728 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5729 errval = RSB_ERR_INTERNAL_ERROR;
5730 goto err;/* NOTE : this jump could be evil */
5731 }
5732 #endif
5733
5734 if( mIndx[k] >= basecolumn+blockcolumns )
5735 {
5736 /* new block column, for sure */
5737 mI = mIndx[k]/blockcolumns;
5738 basecolumn = (blockcolumns*(mI));
5739
5740 if( MIndx[k] >= baserow+blockrows )
5741 {
5742 /* new block row */
5743 MI = MIndx[k]/blockrows;
5744 baserow = (blockrows *(MI));
5745 }
5746 else
5747 {
5748 /* same block row */
5749 }
5750 ++K;
5751 bindx [ K ] = mI; /* a new block */
5752 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5753 }
5754 else
5755 if( MIndx[k] >= baserow+blockrows )
5756 {
5757 /* new row block, for sure */
5758 MI = MIndx[k]/blockrows;
5759 baserow = (blockrows *(MI));
5760
5761 if( mIndx[k] < basecolumn )
5762 {
5763 /* new row block, new block column */
5764 mI = 0;
5765 mI = mIndx[k]/blockcolumns;
5766 basecolumn = (blockcolumns*(mI));
5767 }
5768 else
5769 {
5770 /* new row block, same column */
5771 }
5772 ++K;
5773 bindx [ K ] = mI; /* a new block */
5774 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5775 }
5776 else
5777 {
5778 /* same block row for sure */
5779 }
5780 dst = mtxAp->VA;
5781 RSB_DEBUG_ASSERT(mI>=0);
5782 RSB_DEBUG_ASSERT(MI>=0);
5783
5784
5785 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5786 dst +=
5787 K * blockrows * blockcolumns
5788 ;
5789 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5790 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5791 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5792 dst[0] = ((const float*)VA)[k];
5793 ++k;
5794 }
5795 if(nnz)++K; /* if nnz == 0 then K = 0 */
5796 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5797 return RSB_ERR_NO_ERROR; /* FIXME ! */
5798 }
5799 /* type float complex, storage BCSR */
5800 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
5801 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5802 {
5803 float complex * dst = mtxAp->VA;
5804 k = mI = MI = 0;K = 0;
5805 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5806 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5807 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5808 RSB_ASSERT( blockrows && blockcolumns);
5809 #else
5810 blockrows = Mpntr[MI+1] - Mpntr[MI];
5811 blockcolumns = mpntr[mI+1] - mpntr[mI];
5812 #endif
5813
5814 while( MIndx[k] >= (blockrows *(MI+1)) )++MI; /* skipping preceding block rows .. */
5815 while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI; /* skipping preceding block columns .. */
5816 baserow = (blockrows *(MI));
5817 basecolumn = (blockcolumns*(mI));
5818 bindx [ K ] = mI; /* a new block */
5819 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5820
5821
5822
5823 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5824 {
5825 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5826 }
5827
5828 while(RSB_LIKELY(k<nnz))
5829 {
5830 #ifdef DEBUG
5831 if( MIndx[k] < baserow )
5832 {
5833 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5834 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5835 errval = RSB_ERR_INTERNAL_ERROR;
5836 goto err;/* NOTE : this jump could be evil */
5837 }
5838 #endif
5839
5840 if( mIndx[k] >= basecolumn+blockcolumns )
5841 {
5842 /* new block column, for sure */
5843 mI = mIndx[k]/blockcolumns;
5844 basecolumn = (blockcolumns*(mI));
5845
5846 if( MIndx[k] >= baserow+blockrows )
5847 {
5848 /* new block row */
5849 MI = MIndx[k]/blockrows;
5850 baserow = (blockrows *(MI));
5851 }
5852 else
5853 {
5854 /* same block row */
5855 }
5856 ++K;
5857 bindx [ K ] = mI; /* a new block */
5858 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5859 }
5860 else
5861 if( MIndx[k] >= baserow+blockrows )
5862 {
5863 /* new row block, for sure */
5864 MI = MIndx[k]/blockrows;
5865 baserow = (blockrows *(MI));
5866
5867 if( mIndx[k] < basecolumn )
5868 {
5869 /* new row block, new block column */
5870 mI = 0;
5871 mI = mIndx[k]/blockcolumns;
5872 basecolumn = (blockcolumns*(mI));
5873 }
5874 else
5875 {
5876 /* new row block, same column */
5877 }
5878 ++K;
5879 bindx [ K ] = mI; /* a new block */
5880 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5881 }
5882 else
5883 {
5884 /* same block row for sure */
5885 }
5886 dst = mtxAp->VA;
5887 RSB_DEBUG_ASSERT(mI>=0);
5888 RSB_DEBUG_ASSERT(MI>=0);
5889
5890
5891 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5892 dst +=
5893 K * blockrows * blockcolumns
5894 ;
5895 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5896 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
5897 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5898 dst[0] = ((const float complex*)VA)[k];
5899 ++k;
5900 }
5901 if(nnz)++K; /* if nnz == 0 then K = 0 */
5902 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
5903 return RSB_ERR_NO_ERROR; /* FIXME ! */
5904 }
5905 /* type double complex, storage BCSR */
5906 if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
5907 if( mtxAp->matrix_storage==RSB_MATRIX_STORAGE_BCSR )
5908 {
5909 double complex * dst = mtxAp->VA;
5910 k = mI = MI = 0;K = 0;
5911 #if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
5912 /* rsb__get_blocking_size(mtxAp, &blockrows, &blockcolumns);*/
5913 rsb__get_physical_blocking_size(mtxAp, &blockrows, &blockcolumns);
5914 RSB_ASSERT( blockrows && blockcolumns);
5915 #else
5916 blockrows = Mpntr[MI+1] - Mpntr[MI];
5917 blockcolumns = mpntr[mI+1] - mpntr[mI];
5918 #endif
5919
5920 while( MIndx[k] >= (blockrows *(MI+1)) )++MI; /* skipping preceding block rows .. */
5921 while( mIndx[k] >= (blockcolumns*(mI+1)) )++mI; /* skipping preceding block columns .. */
5922 baserow = (blockrows *(MI));
5923 basecolumn = (blockcolumns*(mI));
5924 bindx [ K ] = mI; /* a new block */
5925 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns; /* FIXME : DUPLICATION ?! see later */
5926
5927
5928
5929 if( (mtxAp->flags & RSB_FLAG_SORTED_INPUT ) != 0 && 1 /* ONLY FOR 1 X 1 BLOCKED */)
5930 {
5931 //RSB_STDERR("rsb__do_insert_sorted_optimized : TODO : please specialize for specific blockings ! \n");
5932 }
5933
5934 while(RSB_LIKELY(k<nnz))
5935 {
5936 #ifdef DEBUG
5937 if( MIndx[k] < baserow )
5938 {
5939 RSB_ERROR("k=%zd : (%zd %zd) is not ok\n",k, (rsb_printf_int_t)(MIndx[k]+1),(rsb_printf_int_t)(mIndx[k]+1));
5940 RSB_STDERR("(minor dim. index %zd < base row %zd)\n",(rsb_printf_int_t)MIndx[k] , (rsb_printf_int_t)baserow);
5941 errval = RSB_ERR_INTERNAL_ERROR;
5942 goto err;/* NOTE : this jump could be evil */
5943 }
5944 #endif
5945
5946 if( mIndx[k] >= basecolumn+blockcolumns )
5947 {
5948 /* new block column, for sure */
5949 mI = mIndx[k]/blockcolumns;
5950 basecolumn = (blockcolumns*(mI));
5951
5952 if( MIndx[k] >= baserow+blockrows )
5953 {
5954 /* new block row */
5955 MI = MIndx[k]/blockrows;
5956 baserow = (blockrows *(MI));
5957 }
5958 else
5959 {
5960 /* same block row */
5961 }
5962 ++K;
5963 bindx [ K ] = mI; /* a new block */
5964 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5965 }
5966 else
5967 if( MIndx[k] >= baserow+blockrows )
5968 {
5969 /* new row block, for sure */
5970 MI = MIndx[k]/blockrows;
5971 baserow = (blockrows *(MI));
5972
5973 if( mIndx[k] < basecolumn )
5974 {
5975 /* new row block, new block column */
5976 mI = 0;
5977 mI = mIndx[k]/blockcolumns;
5978 basecolumn = (blockcolumns*(mI));
5979 }
5980 else
5981 {
5982 /* new row block, same column */
5983 }
5984 ++K;
5985 bindx [ K ] = mI; /* a new block */
5986 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
5987 }
5988 else
5989 {
5990 /* same block row for sure */
5991 }
5992 dst = mtxAp->VA;
5993 RSB_DEBUG_ASSERT(mI>=0);
5994 RSB_DEBUG_ASSERT(MI>=0);
5995
5996
5997 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
5998 dst +=
5999 K * blockrows * blockcolumns
6000 ;
6001 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
6002 dst += (MIndx[k]-baserow)*blockcolumns+(mIndx[k]-basecolumn);
6003 RSB_DEBUG_ASSERT(((rsb_byte_t*)dst)>=((rsb_byte_t*)mtxAp->VA));
6004 dst[0] = ((const double complex*)VA)[k];
6005 ++k;
6006 }
6007 if(nnz)++K; /* if nnz == 0 then K = 0 */
6008 bindx[K] = 0; // the first element off the working bindx should be set to a safe value
6009 return RSB_ERR_NO_ERROR; /* FIXME ! */
6010 }
6011 errval = RSB_ERR_INTERNAL_ERROR;
6012 return errval;
6013 }
6014
rsb__dump_block(rsb_type_t type,const void * VA,rsb_blk_idx_t roff,rsb_blk_idx_t coff,rsb_blk_idx_t rows,rsb_blk_idx_t cols)6015 rsb_err_t rsb__dump_block(rsb_type_t type, const void * VA, rsb_blk_idx_t roff, rsb_blk_idx_t coff, rsb_blk_idx_t rows, rsb_blk_idx_t cols )
6016 {
6017 /*!
6018 * Will dump to stdout a dense matrix.
6019 * Used for debugging purposes.
6020 *
6021 * FIXME : should be integrated with the macro subsystem in util.m4, and support column major order, and debugged.
6022 */
6023 #if RSB_ALLOW_STDOUT
6024 register rsb_coo_idx_t i, j;
6025
6026 if(RSB_BLK_MUL_OVERFLOW(rows,cols))
6027 return RSB_ERR_LIMITS;
6028
6029 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6030 if(type == RSB_NUMERICAL_TYPE_DOUBLE )
6031 {
6032 for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6033 if(((double*)VA)[cols*i+j]!=((double)(0)) )
6034 { RSB_STDOUT(""
6035 "%zd"/* FIXME : this could be any index type! */
6036 "\t"
6037 "%zd"
6038 "\t"
6039 RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING
6040 "\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6041 ((double*)VA)[cols*i+j]);
6042 }
6043 return RSB_ERR_NO_ERROR;
6044 }
6045 #endif
6046 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6047 if(type == RSB_NUMERICAL_TYPE_FLOAT )
6048 {
6049 for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6050 if(((float*)VA)[cols*i+j]!=((float)(0)) )
6051 { RSB_STDOUT(""
6052 "%zd"/* FIXME : this could be any index type! */
6053 "\t"
6054 "%zd"
6055 "\t"
6056 RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING
6057 "\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6058 ((float*)VA)[cols*i+j]);
6059 }
6060 return RSB_ERR_NO_ERROR;
6061 }
6062 #endif
6063 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6064 if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6065 {
6066 for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6067 if(((float complex*)VA)[cols*i+j]!=((float complex)(0)) )
6068 { RSB_STDOUT(""
6069 "%zd"/* FIXME : this could be any index type! */
6070 "\t"
6071 "%zd"
6072 "\t"
6073 RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING
6074 "\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6075 crealf(((float complex*)VA)[cols*i+j]),cimagf(((float complex*)VA)[cols*i+j]));
6076 }
6077 return RSB_ERR_NO_ERROR;
6078 }
6079 #endif
6080 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6081 if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6082 {
6083 for(i=0;i<rows;++i)for(j=0;j<cols;++j)
6084 if(((double complex*)VA)[cols*i+j]!=((double complex)(0)) )
6085 { RSB_STDOUT(""
6086 "%zd"/* FIXME : this could be any index type! */
6087 "\t"
6088 "%zd"
6089 "\t"
6090 RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING
6091 "\n",(rsb_printf_int_t)(roff+i+1),(rsb_printf_int_t)(coff+j+1),
6092 creal(((double complex*)VA)[cols*i+j]),cimag(((double complex*)VA)[cols*i+j]));
6093 }
6094 return RSB_ERR_NO_ERROR;
6095 }
6096 #endif
6097 return RSB_ERR_UNSUPPORTED_TYPE ;
6098 #else
6099 return RSB_ERR_UNSUPPORTED_FEATURE;
6100 #endif
6101 }
6102
rsb__dump_blocks(const struct rsb_mtx_t * mtxAp)6103 rsb_err_t rsb__dump_blocks(const struct rsb_mtx_t *mtxAp)
6104 {
6105 return RSB_ERR_UNIMPLEMENTED_YET;
6106 #if 0
6107 /*!
6108 * \ingroup gr_internals
6109 * A debug function for printing out the matrix structure.
6110 *
6111 * FIXME : UNFINISHED
6112 * Note : it is extremely slow.
6113 **/
6114 rsb_blk_idx_t i,j;
6115 if(!mtxAp)return RSB_ERR_BADARGS;
6116 if(!mtxAp->options)return RSB_ERR_BADARGS;
6117
6118 RSB_STDERR("\t block structure :\n");
6119
6120 /* this prints out the matrix blocks nnz structure */
6121 for(i=0;i<mtxAp->M_b;++i)
6122 {
6123 for(j=0;j<mtxAp->K_b;++j)
6124 if((RSB_BITMAP_GET(mtxAp->options->bitmap,mtxAp->M_b,mtxAp->K_b,i,j)))
6125 {
6126 RSB_STDERR("1");
6127 }
6128 else
6129 {
6130 RSB_STDERR("0");
6131 }
6132 RSB_STDERR("\n");
6133 }
6134 return RSB_ERR_NO_ERROR;
6135 #endif
6136 }
6137
rsb__test_print_csr(rsb_type_t type,rsb_flags_t flags,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const void * VA,rsb_coo_idx_t rows,rsb_coo_idx_t cols,rsb_nnz_idx_t nnz,rsb_bool_t want_header,FILE * stream)6138 rsb_err_t rsb__test_print_csr(rsb_type_t type, rsb_flags_t flags, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const void * VA, rsb_coo_idx_t rows, rsb_coo_idx_t cols, rsb_nnz_idx_t nnz, rsb_bool_t want_header, FILE*stream)
6139 {
6140 /**
6141 * \ingroup gr_internals
6142 * Dumps out a whole matrix, from its CSR representation.
6143 *
6144 * Warning : the nonzeros should be sorted on input.
6145 */
6146 #if RSB_ALLOW_STDOUT
6147 rsb_coo_idx_t k;
6148 if( !stream )goto err;
6149 if( !IA )goto err;
6150 if( ( !JA || !VA ) && nnz>0 )goto err;
6151
6152 RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t)rows);
6153 /* RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t) nnz); */
6154 for(k=0;k<rows+1;++k) { RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t)(IA[k]+1)); }
6155 for(k=0;k<nnz ;++k) { RSB_FPRINTF(stream,"%zd\n",(rsb_printf_int_t)(JA[k]+1)); }
6156 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6157 if(type == RSB_NUMERICAL_TYPE_DOUBLE )
6158 {
6159 for(k=0;k<nnz;++k)
6160 {
6161 RSB_FPRINTF(stream,
6162 RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING
6163 "\n"
6164 ,((double*)VA)[k]);
6165 }
6166 return RSB_ERR_NO_ERROR;
6167 }
6168 #endif
6169 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6170 if(type == RSB_NUMERICAL_TYPE_FLOAT )
6171 {
6172 for(k=0;k<nnz;++k)
6173 {
6174 RSB_FPRINTF(stream,
6175 RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING
6176 "\n"
6177 ,((float*)VA)[k]);
6178 }
6179 return RSB_ERR_NO_ERROR;
6180 }
6181 #endif
6182 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6183 if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6184 {
6185 for(k=0;k<nnz;++k)
6186 {
6187 RSB_FPRINTF(stream,
6188 RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING
6189 "\n"
6190 ,crealf(((float complex*)VA)[k]),cimagf(((float complex*)VA)[k]));
6191 }
6192 return RSB_ERR_NO_ERROR;
6193 }
6194 #endif
6195 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6196 if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6197 {
6198 for(k=0;k<nnz;++k)
6199 {
6200 RSB_FPRINTF(stream,
6201 RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING
6202 "\n"
6203 ,creal(((double complex*)VA)[k]),cimag(((double complex*)VA)[k]));
6204 }
6205 return RSB_ERR_NO_ERROR;
6206 }
6207 #endif
6208 err:
6209 return RSB_ERR_GENERIC_ERROR;
6210 #else
6211 return RSB_ERR_UNSUPPORTED_FEATURE;
6212 #endif
6213 }
6214
rsb__test_print_coo_mm(rsb_type_t type,rsb_flags_t flags,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const void * VA,rsb_coo_idx_t rows,rsb_coo_idx_t cols,rsb_nnz_idx_t nnz,rsb_bool_t want_header,FILE * stream)6215 rsb_err_t rsb__test_print_coo_mm(rsb_type_t type, rsb_flags_t flags, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const void * VA, rsb_coo_idx_t rows, rsb_coo_idx_t cols, rsb_nnz_idx_t nnz, rsb_bool_t want_header, FILE*stream)
6216 {
6217 /**
6218 * \ingroup gr_internals
6219 * Dumps out a whole matrix, from its coordinates, in matrix market format.
6220 *
6221 * Warning : the nonzeros should be sorted on input.
6222 */
6223 #if RSB_ALLOW_STDOUT
6224 rsb_coo_idx_t k;
6225 const char * ts = RSB_IS_MATRIX_TYPE_COMPLEX(type)?"complex":"real";
6226 const char * ss = RSB_SYMMETRY_STRING(flags);
6227
6228 if( !stream )
6229 {
6230 goto err;
6231 }
6232
6233 if( ( !IA || !JA || !VA ) && nnz > 0 )
6234 goto err;
6235 if( rows < 0 || cols < 0 || nnz < 0 )
6236 goto err;
6237
6238 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6239 if(type == RSB_NUMERICAL_TYPE_DOUBLE )
6240 {
6241 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6242 /* for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6243 for(k=0;k<nnz;++k)
6244 {
6245 RSB_FPRINTF(stream,
6246 "%zd"
6247 "\t"
6248 "%zd"
6249 "\t"
6250 RSB_MATRIX_STORAGE_DOUBLE_PRINTF_STRING
6251 "\n"
6252 ,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((double*)VA)[k]);
6253 }
6254 return RSB_ERR_NO_ERROR;
6255 }
6256 #endif
6257 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6258 if(type == RSB_NUMERICAL_TYPE_FLOAT )
6259 {
6260 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6261 /* for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6262 for(k=0;k<nnz;++k)
6263 {
6264 RSB_FPRINTF(stream,
6265 "%zd"
6266 "\t"
6267 "%zd"
6268 "\t"
6269 RSB_MATRIX_STORAGE_FLOAT_PRINTF_STRING
6270 "\n"
6271 ,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]);
6272 }
6273 return RSB_ERR_NO_ERROR;
6274 }
6275 #endif
6276 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6277 if(type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6278 {
6279 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6280 /* for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6281 for(k=0;k<nnz;++k)
6282 {
6283 RSB_FPRINTF(stream,
6284 "%zd"
6285 "\t"
6286 "%zd"
6287 "\t"
6288 RSB_MATRIX_STORAGE_FLOAT_COMPLEX_PRINTF_STRING
6289 "\n"
6290 ,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),crealf(((float complex*)VA)[k]),cimagf(((float complex*)VA)[k]));
6291 }
6292 return RSB_ERR_NO_ERROR;
6293 }
6294 #endif
6295 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6296 if(type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6297 {
6298 if(want_header)RSB_FPRINTF(stream,"%%%%MatrixMarket matrix coordinate %s %s\n%zd %zd %zd\n",ts,ss,(rsb_printf_int_t)rows,(rsb_printf_int_t)cols,(rsb_printf_int_t)nnz);
6299 /* for(k=0;k<nnz;++k) { RSB_FPRINTF(stream,"%6zd %6zd %20g\n",(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),((float*)VA)[k]); }*/
6300 for(k=0;k<nnz;++k)
6301 {
6302 RSB_FPRINTF(stream,
6303 "%zd"
6304 "\t"
6305 "%zd"
6306 "\t"
6307 RSB_MATRIX_STORAGE_DOUBLE_COMPLEX_PRINTF_STRING
6308 "\n"
6309 ,(rsb_printf_int_t)(IA[k]+1),(rsb_printf_int_t)(JA[k]+1),creal(((double complex*)VA)[k]),cimag(((double complex*)VA)[k]));
6310 }
6311 return RSB_ERR_NO_ERROR;
6312 }
6313 #endif
6314 err:
6315 return RSB_ERR_GENERIC_ERROR;
6316 #else
6317 return RSB_ERR_UNSUPPORTED_FEATURE;
6318 #endif
6319 }
6320
rsb__do_sizeof(rsb_type_t type)6321 /*static*/ /*inline*/ size_t rsb__do_sizeof(rsb_type_t type) {
6322 /*
6323 * FIXME : UNUSED ?
6324 */
6325 size_t so = 0;
6326 switch(type)
6327 {
6328 /* supported (double,float,float complex,double complex) */
6329 case RSB_NUMERICAL_TYPE_DOUBLE :
6330 so = sizeof(double);
6331 break;
6332 case RSB_NUMERICAL_TYPE_FLOAT :
6333 so = sizeof(float);
6334 break;
6335 case RSB_NUMERICAL_TYPE_FLOAT_COMPLEX :
6336 so = sizeof(float complex);
6337 break;
6338 case RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX :
6339 so = sizeof(double complex);
6340 break;
6341 /* unsupported type */
6342 default :
6343 RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS
6344 }
6345 return so;
6346 }
6347
rsb__do_coo_sum(struct rsb_coo_matrix_t * coocp,const void * alphap,const struct rsb_coo_matrix_t * cooap,const void * betap,const struct rsb_coo_matrix_t * coobp)6348 rsb_err_t rsb__do_coo_sum( struct rsb_coo_matrix_t*coocp, const void *alphap, const struct rsb_coo_matrix_t*cooap, const void *betap, const struct rsb_coo_matrix_t*coobp)
6349 {
6350 struct rsb_coo_matrix_t cooa = *cooap, coob = *coobp, cooc = *coocp;
6351 rsb_nnz_idx_t /*rnz = 0,*/an, bn, cn;
6352
6353 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6354 if(cooa.typecode == RSB_NUMERICAL_TYPE_DOUBLE )
6355 {
6356 double alpha = alphap?*(double*)alphap:((double)(1.0));
6357 double beta = betap ?*(double*)betap :((double)(1.0));
6358 for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6359 {
6360 rsb_nnz_idx_t ap = an, bp = bn;
6361 if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6362 cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6363 ((double*)cooc.VA)[cn] = alpha * ((double*)cooa.VA)[an] + beta * ((double*)coob.VA)[bn],
6364 ap = an, bp = bn, ++cn, ++an, ++bn;
6365
6366 for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6367 //RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6368 ((double*)cooc.VA)[cn] += alpha * ((double*)cooa.VA)[an];
6369
6370 for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6371 //RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6372 ((double*)cooc.VA)[cn] += beta * ((double*)coob.VA)[bn];
6373
6374 if( bn<coob.nnz )
6375 for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6376 (cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6377 ;++an)
6378 //RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6379 cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6380 ((double*)cooc.VA)[cn] = alpha * ((double*)cooa.VA)[an],
6381 ++cn;
6382
6383 if( an<cooa.nnz )
6384 for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6385 (cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6386 ;++bn)
6387 // RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6388 cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6389 ((double*)cooc.VA)[cn] = beta * ((double*)coob.VA)[bn],
6390 ++cn;
6391 //RSB_STDOUT("? %d %d\n",an,bn);
6392 }
6393 }
6394 else
6395 #endif
6396 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6397 if(cooa.typecode == RSB_NUMERICAL_TYPE_FLOAT )
6398 {
6399 float alpha = alphap?*(float*)alphap:((float)(1.0));
6400 float beta = betap ?*(float*)betap :((float)(1.0));
6401 for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6402 {
6403 rsb_nnz_idx_t ap = an, bp = bn;
6404 if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6405 cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6406 ((float*)cooc.VA)[cn] = alpha * ((float*)cooa.VA)[an] + beta * ((float*)coob.VA)[bn],
6407 ap = an, bp = bn, ++cn, ++an, ++bn;
6408
6409 for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6410 //RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6411 ((float*)cooc.VA)[cn] += alpha * ((float*)cooa.VA)[an];
6412
6413 for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6414 //RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6415 ((float*)cooc.VA)[cn] += beta * ((float*)coob.VA)[bn];
6416
6417 if( bn<coob.nnz )
6418 for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6419 (cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6420 ;++an)
6421 //RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6422 cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6423 ((float*)cooc.VA)[cn] = alpha * ((float*)cooa.VA)[an],
6424 ++cn;
6425
6426 if( an<cooa.nnz )
6427 for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6428 (cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6429 ;++bn)
6430 // RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6431 cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6432 ((float*)cooc.VA)[cn] = beta * ((float*)coob.VA)[bn],
6433 ++cn;
6434 //RSB_STDOUT("? %d %d\n",an,bn);
6435 }
6436 }
6437 else
6438 #endif
6439 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6440 if(cooa.typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6441 {
6442 float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
6443 float complex beta = betap ?*(float complex*)betap :((float complex)(1.0));
6444 for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6445 {
6446 rsb_nnz_idx_t ap = an, bp = bn;
6447 if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6448 cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6449 ((float complex*)cooc.VA)[cn] = alpha * ((float complex*)cooa.VA)[an] + beta * ((float complex*)coob.VA)[bn],
6450 ap = an, bp = bn, ++cn, ++an, ++bn;
6451
6452 for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6453 //RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6454 ((float complex*)cooc.VA)[cn] += alpha * ((float complex*)cooa.VA)[an];
6455
6456 for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6457 //RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6458 ((float complex*)cooc.VA)[cn] += beta * ((float complex*)coob.VA)[bn];
6459
6460 if( bn<coob.nnz )
6461 for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6462 (cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6463 ;++an)
6464 //RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6465 cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6466 ((float complex*)cooc.VA)[cn] = alpha * ((float complex*)cooa.VA)[an],
6467 ++cn;
6468
6469 if( an<cooa.nnz )
6470 for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6471 (cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6472 ;++bn)
6473 // RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6474 cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6475 ((float complex*)cooc.VA)[cn] = beta * ((float complex*)coob.VA)[bn],
6476 ++cn;
6477 //RSB_STDOUT("? %d %d\n",an,bn);
6478 }
6479 }
6480 else
6481 #endif
6482 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6483 if(cooa.typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6484 {
6485 double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
6486 double complex beta = betap ?*(double complex*)betap :((double complex)(1.0));
6487 for(cn = 0, an = 0, bn = 0;an<cooa.nnz || bn<coob.nnz;)
6488 {
6489 rsb_nnz_idx_t ap = an, bp = bn;
6490 if(cooa.IA[an]==coob.IA[bn] && cooa.JA[an]==coob.JA[bn])
6491 cooc.IA[cn] = cooa.IA[an],cooc.JA[cn] = cooa.JA[an],
6492 ((double complex*)cooc.VA)[cn] = alpha * ((double complex*)cooa.VA)[an] + beta * ((double complex*)coob.VA)[bn],
6493 ap = an, bp = bn, ++cn, ++an, ++bn;
6494
6495 for(;an<cooa.nnz && cooa.IA[an]==cooa.IA[ap] && cooa.JA[an]==cooa.JA[ap] ;++an)
6496 //RSB_STDOUT("x> %d %d\n",cooa.IA[an],cooa.JA[an])
6497 ((double complex*)cooc.VA)[cn] += alpha * ((double complex*)cooa.VA)[an];
6498
6499 for(;bn<coob.nnz && coob.IA[bn]==coob.IA[bp] && coob.JA[bn]==coob.JA[bp] ;++bn)
6500 //RSB_STDOUT("x> %d %d\n",coob.IA[bn],coob.JA[bn])
6501 ((double complex*)cooc.VA)[cn] += beta * ((double complex*)coob.VA)[bn];
6502
6503 if( bn<coob.nnz )
6504 for(;an<cooa.nnz && (cooa.IA[an]<coob.IA[bn] ||
6505 (cooa.IA[an] <= coob.IA[bn] && cooa.JA[an]<coob.JA[bn]))
6506 ;++an)
6507 //RSB_STDOUT("-> %d %d\n",cooa.IA[an],cooa.JA[an]),
6508 cooc.IA[cn] = cooa.IA[an], cooc.JA[cn] = cooa.JA[an],
6509 ((double complex*)cooc.VA)[cn] = alpha * ((double complex*)cooa.VA)[an],
6510 ++cn;
6511
6512 if( an<cooa.nnz )
6513 for(;bn<coob.nnz && (cooa.IA[an]>coob.IA[bn] ||
6514 (cooa.IA[an]>=coob.IA[bn] && cooa.JA[an]>coob.JA[bn]))
6515 ;++bn)
6516 // RSB_STDOUT("-> %d %d\n",coob.IA[bn],coob.JA[bn]),
6517 cooc.IA[cn] = coob.IA[bn],cooc.JA[cn] = coob.JA[bn],
6518 ((double complex*)cooc.VA)[cn] = beta * ((double complex*)coob.VA)[bn],
6519 ++cn;
6520 //RSB_STDOUT("? %d %d\n",an,bn);
6521 }
6522 }
6523 else
6524 #endif
6525 return RSB_ERR_UNSUPPORTED_TYPE ;
6526 return RSB_ERR_NO_ERROR;
6527 }
6528
rsb__cor_merge_dups(rsb_type_t typecode,void * RSB_RESTRICT VA,rsb_coo_idx_t * RSB_RESTRICT IA,rsb_coo_idx_t * RSB_RESTRICT JA,rsb_nnz_idx_t offB,rsb_nnz_idx_t nnzB,rsb_nnz_idx_t nnzC,const int wv,int wp,rsb_nnz_idx_t * onzp,struct rsb_coo_matrix_t * RSB_RESTRICT coop)6529 rsb_err_t rsb__cor_merge_dups(rsb_type_t typecode, void* RSB_RESTRICT VA, rsb_coo_idx_t * RSB_RESTRICT IA, rsb_coo_idx_t * RSB_RESTRICT JA, rsb_nnz_idx_t offB, rsb_nnz_idx_t nnzB, rsb_nnz_idx_t nnzC, const int wv, int wp, rsb_nnz_idx_t *onzp, struct rsb_coo_matrix_t*RSB_RESTRICT coop)
6530 {
6531 /**
6532 See rsb__cor_merge.
6533 */
6534 rsb_err_t errval = RSB_ERR_NO_ERROR;
6535 void *VB = NULL, *VC = NULL, *VT = NULL;
6536 rsb_coo_idx_t * IB = NULL, *JB = NULL;
6537 rsb_coo_idx_t * IC = NULL, *JC = NULL;
6538 rsb_coo_idx_t * IT = NULL, *JT = NULL;
6539 rsb_nnz_idx_t bi = 0, ci = 0, ti = 0;
6540 rsb_nnz_idx_t b0 = 0, c0 = 0, t0 = 0;
6541 rsb_nnz_idx_t onz = 0;
6542 struct rsb_coo_matrix_t coo;
6543 size_t es = RSB_SIZEOF(typecode);
6544
6545 if( nnzB == 0 || nnzC == 0 )
6546 {
6547 goto ret;
6548 }
6549
6550 b0 = offB;
6551 c0 = offB + nnzB;
6552 VB = RSB_TYPED_OFF_PTR(typecode,VA,b0);
6553 VC = RSB_TYPED_OFF_PTR(typecode,VA,c0);
6554 IB = IA + b0;
6555 IC = IA + c0;
6556 JB = JA + b0;
6557 JC = JA + c0;
6558
6559 RSB_BZERO_P(&coo);
6560 coo.nnz = nnzB + nnzC;
6561 coo.typecode = typecode;
6562
6563 if( coop && coop->nnz)
6564 {
6565 coo = *coop;
6566 coo.nnz = nnzB + nnzC; /* necessary */
6567 }
6568 else
6569 {
6570 if( NULL == rsb__allocate_coo_matrix_t(&coo) )
6571 goto err;
6572 }
6573
6574 IT = coo.IA;
6575 JT = coo.JA;
6576 VT = coo.VA;
6577
6578 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
6579 if(typecode == RSB_NUMERICAL_TYPE_DOUBLE )
6580 {
6581 double * vT = VT;
6582 double * vB = VB;
6583 double * vC = VC;
6584
6585 again_double:
6586 t0 = ti;
6587
6588 if ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6589 {
6590 IT[ti] = IB[bi];
6591 JT[ti] = JB[bi];
6592 vT[ti] = vB[bi];
6593 ++bi,++ti;
6594 }
6595
6596 while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6597 {
6598 --ti;
6599 vT[ti] += vB[bi];
6600 ++bi;
6601 ++ti;
6602 ++onz;
6603 }
6604
6605 /* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ... */
6606 t0 = ti;
6607 if ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6608 {
6609 IT[ti] = IB[bi];
6610 JT[ti] = JB[bi];
6611 vT[ti] = vB[bi] + vC[ci];
6612 ++bi,++ci,++ti;
6613 ++onz;
6614 }
6615
6616 while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6617 {
6618 --ti;
6619 vT[ti] += vB[bi] + vC[ci];
6620 ++bi;
6621 ++ci;
6622 ++ti;
6623 ++onz;
6624 }
6625
6626 t0 = ti;
6627 if ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6628 {
6629 IT[ti] = IC[ci];
6630 JT[ti] = JC[ci];
6631 vT[ti] = vC[ci];
6632 ++ci,++ti;
6633 }
6634
6635 while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6636 {
6637 --ti;
6638 vT[ti] += vC[ci];
6639 ++ci;
6640 ++ti;
6641 ++onz;
6642 }
6643
6644 if( ci < nnzC && bi < nnzB )
6645 goto again_double;
6646
6647 again_once_double:
6648
6649 if ( bi<nnzB && ci==nnzC )
6650 {
6651 IT[ti] = IB[bi];
6652 JT[ti] = JB[bi];
6653 vT[ti] = vB[bi];
6654 ++bi,++ti;
6655 }
6656
6657 while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6658 {
6659 --ti;
6660 vT[ti] += vB[bi];
6661 ++bi;
6662 ++ti;
6663 ++onz;
6664 }
6665
6666 if ( ci<nnzC && bi==nnzB )
6667 {
6668 IT[ti] = IC[ci];
6669 JT[ti] = JC[ci];
6670 vT[ti] = vC[ci];
6671 ++ci,++ti;
6672 }
6673
6674 while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6675 {
6676 --ti;
6677 vT[ti]+= vC[ci];
6678 ++ci;
6679 ++ti;
6680 ++onz;
6681 }
6682
6683 if( ci < nnzC || bi < nnzB )
6684 goto again_once_double;
6685 }
6686 else
6687 #endif
6688 #ifdef RSB_NUMERICAL_TYPE_FLOAT
6689 if(typecode == RSB_NUMERICAL_TYPE_FLOAT )
6690 {
6691 float * vT = VT;
6692 float * vB = VB;
6693 float * vC = VC;
6694
6695 again_float:
6696 t0 = ti;
6697
6698 if ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6699 {
6700 IT[ti] = IB[bi];
6701 JT[ti] = JB[bi];
6702 vT[ti] = vB[bi];
6703 ++bi,++ti;
6704 }
6705
6706 while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6707 {
6708 --ti;
6709 vT[ti] += vB[bi];
6710 ++bi;
6711 ++ti;
6712 ++onz;
6713 }
6714
6715 /* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ... */
6716 t0 = ti;
6717 if ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6718 {
6719 IT[ti] = IB[bi];
6720 JT[ti] = JB[bi];
6721 vT[ti] = vB[bi] + vC[ci];
6722 ++bi,++ci,++ti;
6723 ++onz;
6724 }
6725
6726 while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6727 {
6728 --ti;
6729 vT[ti] += vB[bi] + vC[ci];
6730 ++bi;
6731 ++ci;
6732 ++ti;
6733 ++onz;
6734 }
6735
6736 t0 = ti;
6737 if ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6738 {
6739 IT[ti] = IC[ci];
6740 JT[ti] = JC[ci];
6741 vT[ti] = vC[ci];
6742 ++ci,++ti;
6743 }
6744
6745 while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6746 {
6747 --ti;
6748 vT[ti] += vC[ci];
6749 ++ci;
6750 ++ti;
6751 ++onz;
6752 }
6753
6754 if( ci < nnzC && bi < nnzB )
6755 goto again_float;
6756
6757 again_once_float:
6758
6759 if ( bi<nnzB && ci==nnzC )
6760 {
6761 IT[ti] = IB[bi];
6762 JT[ti] = JB[bi];
6763 vT[ti] = vB[bi];
6764 ++bi,++ti;
6765 }
6766
6767 while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6768 {
6769 --ti;
6770 vT[ti] += vB[bi];
6771 ++bi;
6772 ++ti;
6773 ++onz;
6774 }
6775
6776 if ( ci<nnzC && bi==nnzB )
6777 {
6778 IT[ti] = IC[ci];
6779 JT[ti] = JC[ci];
6780 vT[ti] = vC[ci];
6781 ++ci,++ti;
6782 }
6783
6784 while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6785 {
6786 --ti;
6787 vT[ti]+= vC[ci];
6788 ++ci;
6789 ++ti;
6790 ++onz;
6791 }
6792
6793 if( ci < nnzC || bi < nnzB )
6794 goto again_once_float;
6795 }
6796 else
6797 #endif
6798 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
6799 if(typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
6800 {
6801 float complex * vT = VT;
6802 float complex * vB = VB;
6803 float complex * vC = VC;
6804
6805 again_float_complex:
6806 t0 = ti;
6807
6808 if ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6809 {
6810 IT[ti] = IB[bi];
6811 JT[ti] = JB[bi];
6812 vT[ti] = vB[bi];
6813 ++bi,++ti;
6814 }
6815
6816 while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6817 {
6818 --ti;
6819 vT[ti] += vB[bi];
6820 ++bi;
6821 ++ti;
6822 ++onz;
6823 }
6824
6825 /* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ... */
6826 t0 = ti;
6827 if ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6828 {
6829 IT[ti] = IB[bi];
6830 JT[ti] = JB[bi];
6831 vT[ti] = vB[bi] + vC[ci];
6832 ++bi,++ci,++ti;
6833 ++onz;
6834 }
6835
6836 while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6837 {
6838 --ti;
6839 vT[ti] += vB[bi] + vC[ci];
6840 ++bi;
6841 ++ci;
6842 ++ti;
6843 ++onz;
6844 }
6845
6846 t0 = ti;
6847 if ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6848 {
6849 IT[ti] = IC[ci];
6850 JT[ti] = JC[ci];
6851 vT[ti] = vC[ci];
6852 ++ci,++ti;
6853 }
6854
6855 while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6856 {
6857 --ti;
6858 vT[ti] += vC[ci];
6859 ++ci;
6860 ++ti;
6861 ++onz;
6862 }
6863
6864 if( ci < nnzC && bi < nnzB )
6865 goto again_float_complex;
6866
6867 again_once_float_complex:
6868
6869 if ( bi<nnzB && ci==nnzC )
6870 {
6871 IT[ti] = IB[bi];
6872 JT[ti] = JB[bi];
6873 vT[ti] = vB[bi];
6874 ++bi,++ti;
6875 }
6876
6877 while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6878 {
6879 --ti;
6880 vT[ti] += vB[bi];
6881 ++bi;
6882 ++ti;
6883 ++onz;
6884 }
6885
6886 if ( ci<nnzC && bi==nnzB )
6887 {
6888 IT[ti] = IC[ci];
6889 JT[ti] = JC[ci];
6890 vT[ti] = vC[ci];
6891 ++ci,++ti;
6892 }
6893
6894 while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6895 {
6896 --ti;
6897 vT[ti]+= vC[ci];
6898 ++ci;
6899 ++ti;
6900 ++onz;
6901 }
6902
6903 if( ci < nnzC || bi < nnzB )
6904 goto again_once_float_complex;
6905 }
6906 else
6907 #endif
6908 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
6909 if(typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
6910 {
6911 double complex * vT = VT;
6912 double complex * vB = VB;
6913 double complex * vC = VC;
6914
6915 again_double_complex:
6916 t0 = ti;
6917
6918 if ( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6919 {
6920 IT[ti] = IB[bi];
6921 JT[ti] = JB[bi];
6922 vT[ti] = vB[bi];
6923 ++bi,++ti;
6924 }
6925
6926 while( bi<nnzB && ci<nnzC && RSB_COO_LT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6927 {
6928 --ti;
6929 vT[ti] += vB[bi];
6930 ++bi;
6931 ++ti;
6932 ++onz;
6933 }
6934
6935 /* FIXME: this works as RSB_FLAG_DUPLICATES_SUM but should support either merge, last, first, ... */
6936 t0 = ti;
6937 if ( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) )
6938 {
6939 IT[ti] = IB[bi];
6940 JT[ti] = JB[bi];
6941 vT[ti] = vB[bi] + vC[ci];
6942 ++bi,++ci,++ti;
6943 ++onz;
6944 }
6945
6946 while( bi<nnzB && ci<nnzC && RSB_COO_EQ(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6947 {
6948 --ti;
6949 vT[ti] += vB[bi] + vC[ci];
6950 ++bi;
6951 ++ci;
6952 ++ti;
6953 ++onz;
6954 }
6955
6956 t0 = ti;
6957 if ( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) )
6958 {
6959 IT[ti] = IC[ci];
6960 JT[ti] = JC[ci];
6961 vT[ti] = vC[ci];
6962 ++ci,++ti;
6963 }
6964
6965 while( bi<nnzB && ci<nnzC && RSB_COO_GT(IB[bi],JB[bi],IC[ci],JC[ci]) && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
6966 {
6967 --ti;
6968 vT[ti] += vC[ci];
6969 ++ci;
6970 ++ti;
6971 ++onz;
6972 }
6973
6974 if( ci < nnzC && bi < nnzB )
6975 goto again_double_complex;
6976
6977 again_once_double_complex:
6978
6979 if ( bi<nnzB && ci==nnzC )
6980 {
6981 IT[ti] = IB[bi];
6982 JT[ti] = JB[bi];
6983 vT[ti] = vB[bi];
6984 ++bi,++ti;
6985 }
6986
6987 while( bi<nnzB && ci==nnzC && ti > 0 && RSB_COO_EQ(IB[bi],JB[bi],IT[ti-1],JT[ti-1]) )
6988 {
6989 --ti;
6990 vT[ti] += vB[bi];
6991 ++bi;
6992 ++ti;
6993 ++onz;
6994 }
6995
6996 if ( ci<nnzC && bi==nnzB )
6997 {
6998 IT[ti] = IC[ci];
6999 JT[ti] = JC[ci];
7000 vT[ti] = vC[ci];
7001 ++ci,++ti;
7002 }
7003
7004 while( ci<nnzC && bi==nnzB && ti > 0 && RSB_COO_EQ(IC[ci],JC[ci],IT[ti-1],JT[ti-1]) )
7005 {
7006 --ti;
7007 vT[ti]+= vC[ci];
7008 ++ci;
7009 ++ti;
7010 ++onz;
7011 }
7012
7013 if( ci < nnzC || bi < nnzB )
7014 goto again_once_double_complex;
7015 }
7016 else
7017 #endif
7018 errval = RSB_ERR_INTERNAL_ERROR;
7019
7020 coo.nnz -= onz;
7021 RSB_COA_MEMCPY(IA,IT,offB,0,(coo.nnz));
7022 RSB_COA_MEMCPY(JA,JT,offB,0,(coo.nnz));
7023 if(wp)
7024 {
7025 RSB_A_MEMCPY_parallel( VA,VT,offB,0,(coo.nnz),es);
7026 }
7027 else
7028 {
7029 RSB_A_MEMCPY( VA,VT,offB,0,(coo.nnz),es);
7030 }
7031 RSB_ASSERT(rsb__util_is_coo_array_sorted_up_partial_order(IA,coo.nnz));
7032 goto done;
7033 err:
7034 errval = RSB_ERR_ENOMEM;
7035 done:
7036 if( coop && coop->nnz)
7037 ;
7038 else
7039 rsb__destroy_coo_matrix_t(&coo);
7040 RSB_ASSIGN_IF(onzp,onz);
7041 ret:
7042 return errval;
7043 }
7044
rsb__do_copy_converted_scaled(const void * RSB_RESTRICT src,void * RSB_RESTRICT dst,const void * RSB_RESTRICT alphap,rsb_type_t stype,rsb_type_t dtype,size_t nnz,rsb_trans_t transA)7045 rsb_err_t rsb__do_copy_converted_scaled(const void *RSB_RESTRICT src, void *RSB_RESTRICT dst, const void *RSB_RESTRICT alphap, rsb_type_t stype,rsb_type_t dtype, size_t nnz, rsb_trans_t transA)
7046 {
7047 /*!
7048 * Copies scaled and conj-transposed.
7049 * alpha according to src code type.
7050 * \return \rsberrcodemsg
7051 * */
7052 rsb_nnz_idx_t nzi;
7053
7054 if((!dst) || (!src))
7055 return RSB_ERR_BADARGS;
7056
7057 if( stype == RSB_NUMERICAL_TYPE_DOUBLE && dtype == RSB_NUMERICAL_TYPE_DOUBLE )
7058 {
7059 const double alpha = alphap?*(double*)alphap:((double)(1.0));
7060 const double*tsrc = src;
7061 double*tdst = dst;
7062 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double)(alpha*tsrc[nzi]);
7063 }
7064 else
7065 if( stype == RSB_NUMERICAL_TYPE_DOUBLE && dtype == RSB_NUMERICAL_TYPE_FLOAT )
7066 {
7067 const double alpha = alphap?*(double*)alphap:((double)(1.0));
7068 const double*tsrc = src;
7069 float*tdst = dst;
7070 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float)(alpha*tsrc[nzi]);
7071 }
7072 else
7073 if( stype == RSB_NUMERICAL_TYPE_DOUBLE && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7074 {
7075 const double alpha = alphap?*(double*)alphap:((double)(1.0));
7076 const double*tsrc = src;
7077 float complex*tdst = dst;
7078 if(RSB_DOES_CONJUGATE(transA))
7079 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi])) + 0*I;
7080 else
7081 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]) + 0*I;
7082 }
7083 else
7084 if( stype == RSB_NUMERICAL_TYPE_DOUBLE && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7085 {
7086 const double alpha = alphap?*(double*)alphap:((double)(1.0));
7087 const double*tsrc = src;
7088 double complex*tdst = dst;
7089 if(RSB_DOES_CONJUGATE(transA))
7090 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi])) + 0*I;
7091 else
7092 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]) + 0*I;
7093 }
7094 else
7095 if( stype == RSB_NUMERICAL_TYPE_FLOAT && dtype == RSB_NUMERICAL_TYPE_DOUBLE )
7096 {
7097 const float alpha = alphap?*(float*)alphap:((float)(1.0));
7098 const float*tsrc = src;
7099 double*tdst = dst;
7100 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double)(alpha*tsrc[nzi]);
7101 }
7102 else
7103 if( stype == RSB_NUMERICAL_TYPE_FLOAT && dtype == RSB_NUMERICAL_TYPE_FLOAT )
7104 {
7105 const float alpha = alphap?*(float*)alphap:((float)(1.0));
7106 const float*tsrc = src;
7107 float*tdst = dst;
7108 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float)(alpha*tsrc[nzi]);
7109 }
7110 else
7111 if( stype == RSB_NUMERICAL_TYPE_FLOAT && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7112 {
7113 const float alpha = alphap?*(float*)alphap:((float)(1.0));
7114 const float*tsrc = src;
7115 float complex*tdst = dst;
7116 if(RSB_DOES_CONJUGATE(transA))
7117 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi])) + 0*I;
7118 else
7119 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]) + 0*I;
7120 }
7121 else
7122 if( stype == RSB_NUMERICAL_TYPE_FLOAT && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7123 {
7124 const float alpha = alphap?*(float*)alphap:((float)(1.0));
7125 const float*tsrc = src;
7126 double complex*tdst = dst;
7127 if(RSB_DOES_CONJUGATE(transA))
7128 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi])) + 0*I;
7129 else
7130 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]) + 0*I;
7131 }
7132 else
7133 if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX && dtype == RSB_NUMERICAL_TYPE_DOUBLE )
7134 {
7135 const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7136 const float complex*tsrc = src;
7137 double*tdst = dst;
7138 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = crealf((double)(alpha*tsrc[nzi]));
7139 }
7140 else
7141 if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX && dtype == RSB_NUMERICAL_TYPE_FLOAT )
7142 {
7143 const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7144 const float complex*tsrc = src;
7145 float*tdst = dst;
7146 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = crealf((float)(alpha*tsrc[nzi]));
7147 }
7148 else
7149 if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7150 {
7151 const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7152 const float complex*tsrc = src;
7153 float complex*tdst = dst;
7154 if(RSB_DOES_CONJUGATE(transA))
7155 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi]));
7156 else
7157 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]);
7158 }
7159 else
7160 if( stype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7161 {
7162 const float complex alpha = alphap?*(float complex*)alphap:((float complex)(1.0));
7163 const float complex*tsrc = src;
7164 double complex*tdst = dst;
7165 if(RSB_DOES_CONJUGATE(transA))
7166 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi]));
7167 else
7168 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]);
7169 }
7170 else
7171 if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX && dtype == RSB_NUMERICAL_TYPE_DOUBLE )
7172 {
7173 const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7174 const double complex*tsrc = src;
7175 double*tdst = dst;
7176 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = creal((double)(alpha*tsrc[nzi]));
7177 }
7178 else
7179 if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX && dtype == RSB_NUMERICAL_TYPE_FLOAT )
7180 {
7181 const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7182 const double complex*tsrc = src;
7183 float*tdst = dst;
7184 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = creal((float)(alpha*tsrc[nzi]));
7185 }
7186 else
7187 if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX && dtype == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7188 {
7189 const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7190 const double complex*tsrc = src;
7191 float complex*tdst = dst;
7192 if(RSB_DOES_CONJUGATE(transA))
7193 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*conjf(tsrc[nzi]));
7194 else
7195 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (float complex)(alpha*tsrc[nzi]);
7196 }
7197 else
7198 if( stype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX && dtype == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7199 {
7200 const double complex alpha = alphap?*(double complex*)alphap:((double complex)(1.0));
7201 const double complex*tsrc = src;
7202 double complex*tdst = dst;
7203 if(RSB_DOES_CONJUGATE(transA))
7204 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*conj(tsrc[nzi]));
7205 else
7206 for(nzi=0;nzi<nnz;++nzi) tdst[nzi] = (double complex)(alpha*tsrc[nzi]);
7207 }
7208 else
7209 return RSB_ERR_UNSUPPORTED_TYPE ;
7210 return RSB_ERR_NO_ERROR;
7211 }
7212
rsb_util_csc2csr(const void * RSB_RESTRICT VA,const rsb_coo_idx_t * RSB_RESTRICT IA,const rsb_coo_idx_t * RSB_RESTRICT JA,void * RSB_RESTRICT oVA,rsb_coo_idx_t * RSB_RESTRICT oIA,rsb_coo_idx_t * RSB_RESTRICT oJA,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_nnz_idx_t nnz,rsb_type_t typecode,const rsb_coo_idx_t offi,const rsb_coo_idx_t offo,rsb_flags_t * flagsp)7213 rsb_err_t rsb_util_csc2csr(const void *RSB_RESTRICT VA, const rsb_coo_idx_t * RSB_RESTRICT IA, const rsb_coo_idx_t * RSB_RESTRICT JA, void *RSB_RESTRICT oVA, rsb_coo_idx_t * RSB_RESTRICT oIA, rsb_coo_idx_t * RSB_RESTRICT oJA, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_nnz_idx_t nnz, rsb_type_t typecode, const rsb_coo_idx_t offi, const rsb_coo_idx_t offo, rsb_flags_t*flagsp)
7214 {
7215 /*!
7216 * */
7217 rsb_nnz_idx_t nzi = 0, nzo;
7218 rsb_coo_idx_t nr, nc;
7219 rsb_flags_t flags = RSB_FLAG_NOFLAGS;
7220 rsb_bool_t islowtri = RSB_BOOL_TRUE, isupptri = RSB_BOOL_TRUE;
7221 rsb_nnz_idx_t lowtrin = 0, upptrin = 0;
7222
7223 RSB_BZERO(oIA, sizeof(*oIA)*(m+1));
7224 oIA[0] = offo;
7225 for(nzi=0;nzi<nnz;++nzi)
7226 oIA[IA[nzi]-offi+1]++;
7227 for(nr=0;nr<m;++nr)
7228 oIA[nr+1]+=oIA[nr];
7229 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7230 for(nc=0;nc<k;++nc)
7231 for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7232 {
7233 nzo = oIA[IA[nzi]-offi]++;
7234 oJA[nzo] = nc+offo;
7235 ((double*)oVA)[nzo] = ((const double*)VA)[nzi];
7236 }
7237 else
7238 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7239 for(nc=0;nc<k;++nc)
7240 for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7241 {
7242 nzo = oIA[IA[nzi]-offi]++;
7243 oJA[nzo] = nc+offo;
7244 ((float*)oVA)[nzo] = ((const float*)VA)[nzi];
7245 }
7246 else
7247 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7248 for(nc=0;nc<k;++nc)
7249 for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7250 {
7251 nzo = oIA[IA[nzi]-offi]++;
7252 oJA[nzo] = nc+offo;
7253 ((float complex*)oVA)[nzo] = ((const float complex*)VA)[nzi];
7254 }
7255 else
7256 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7257 for(nc=0;nc<k;++nc)
7258 for(nzi = JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7259 {
7260 nzo = oIA[IA[nzi]-offi]++;
7261 oJA[nzo] = nc+offo;
7262 ((double complex*)oVA)[nzo] = ((const double complex*)VA)[nzi];
7263 }
7264 else
7265 return RSB_ERR_UNSUPPORTED_TYPE ;
7266 for(nc=0;nc<k;++nc)
7267 for(nzi=JA[nc]-offi;nzi<JA[nc+1]-offi;++nzi)
7268 {
7269 oIA[IA[nzi]-offi]--;
7270 if(IA[nzi]-offi>nc)
7271 lowtrin++;
7272 else
7273 if(IA[nzi]-offi<nc)
7274 upptrin++;
7275 }
7276 if(upptrin)
7277 islowtri = RSB_BOOL_FALSE;
7278 if(lowtrin)
7279 isupptri = RSB_BOOL_FALSE;
7280 if(isupptri)
7281 RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
7282 if(islowtri)
7283 RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
7284 if( RSB_XOR(upptrin,lowtrin) )
7285 RSB_DO_FLAG_ADD(flags,RSB_FLAG_TRIANGULAR);
7286 if(*flagsp) RSB_DO_FLAG_ADD(*flagsp,flags);
7287 return RSB_ERR_NO_ERROR;
7288 }
7289
rsb_util_coo_copy_and_stats(const void * RSB_RESTRICT VA,const rsb_coo_idx_t * RSB_RESTRICT IA,const rsb_coo_idx_t * RSB_RESTRICT JA,void * RSB_RESTRICT oVA,rsb_coo_idx_t * RSB_RESTRICT oIA,rsb_coo_idx_t * RSB_RESTRICT oJA,rsb_coo_idx_t * m,rsb_coo_idx_t * k,const rsb_nnz_idx_t nnz,const rsb_type_t typecode,const rsb_coo_idx_t offi,const rsb_coo_idx_t offo,rsb_flags_t iflags,rsb_flags_t * flagsp)7290 rsb_err_t rsb_util_coo_copy_and_stats(const void *RSB_RESTRICT VA, const rsb_coo_idx_t * RSB_RESTRICT IA, const rsb_coo_idx_t * RSB_RESTRICT JA, void *RSB_RESTRICT oVA, rsb_coo_idx_t * RSB_RESTRICT oIA, rsb_coo_idx_t * RSB_RESTRICT oJA, rsb_coo_idx_t*m, rsb_coo_idx_t*k, const rsb_nnz_idx_t nnz, const rsb_type_t typecode, const rsb_coo_idx_t offi, const rsb_coo_idx_t offo, rsb_flags_t iflags, rsb_flags_t*flagsp)
7291 {
7292 /*!
7293 * FIXME: unfinished! shall support also typecode-based zeros removal
7294 * */
7295 rsb_nnz_idx_t nzi = 0;
7296 rsb_coo_idx_t maxi = 0,maxj = 0;
7297 rsb_bool_t islowtri = RSB_BOOL_TRUE,isupptri = RSB_BOOL_TRUE;
7298 rsb_flags_t flags = RSB_FLAG_NOFLAGS;
7299 rsb_nnz_idx_t lowtrin = 0,upptrin = 0;
7300
7301 if(nnz<1)
7302 goto done;
7303
7304 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7305 {
7306 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7307 maxi = i, maxj = j;
7308 ((double*)oVA)[nzi] = ((double*)VA)[nzi];
7309 oIA[nzi] = i-offi+offo;
7310 oJA[nzi] = j-offi+offo;
7311 lowtrin |= (i>j), upptrin |= (i<j);
7312 for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7313 {
7314 rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7315 maxi = RSB_MAX(maxi, i);
7316 maxj = RSB_MAX(maxj, j);
7317 ((double*)oVA)[nzi] = ((double*)VA)[nzi];
7318 oIA[nzi] = i-offi+offo;
7319 oJA[nzi] = j-offi+offo;
7320 lowtrin |= (i>j);
7321 upptrin |= (i<j);
7322 }
7323 }
7324 else
7325 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7326 {
7327 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7328 maxi = i, maxj = j;
7329 ((float*)oVA)[nzi] = ((float*)VA)[nzi];
7330 oIA[nzi] = i-offi+offo;
7331 oJA[nzi] = j-offi+offo;
7332 lowtrin |= (i>j), upptrin |= (i<j);
7333 for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7334 {
7335 rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7336 maxi = RSB_MAX(maxi, i);
7337 maxj = RSB_MAX(maxj, j);
7338 ((float*)oVA)[nzi] = ((float*)VA)[nzi];
7339 oIA[nzi] = i-offi+offo;
7340 oJA[nzi] = j-offi+offo;
7341 lowtrin |= (i>j);
7342 upptrin |= (i<j);
7343 }
7344 }
7345 else
7346 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7347 {
7348 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7349 maxi = i, maxj = j;
7350 ((float complex*)oVA)[nzi] = ((float complex*)VA)[nzi];
7351 oIA[nzi] = i-offi+offo;
7352 oJA[nzi] = j-offi+offo;
7353 lowtrin |= (i>j), upptrin |= (i<j);
7354 for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7355 {
7356 rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7357 maxi = RSB_MAX(maxi, i);
7358 maxj = RSB_MAX(maxj, j);
7359 ((float complex*)oVA)[nzi] = ((float complex*)VA)[nzi];
7360 oIA[nzi] = i-offi+offo;
7361 oJA[nzi] = j-offi+offo;
7362 lowtrin |= (i>j);
7363 upptrin |= (i<j);
7364 }
7365 }
7366 else
7367 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7368 {
7369 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7370 maxi = i, maxj = j;
7371 ((double complex*)oVA)[nzi] = ((double complex*)VA)[nzi];
7372 oIA[nzi] = i-offi+offo;
7373 oJA[nzi] = j-offi+offo;
7374 lowtrin |= (i>j), upptrin |= (i<j);
7375 for(nzi=1;RSB_LIKELY(nzi<nnz);++nzi)
7376 {
7377 rsb_coo_idx_t i = IA[nzi],j = JA[nzi];
7378 maxi = RSB_MAX(maxi, i);
7379 maxj = RSB_MAX(maxj, j);
7380 ((double complex*)oVA)[nzi] = ((double complex*)VA)[nzi];
7381 oIA[nzi] = i-offi+offo;
7382 oJA[nzi] = j-offi+offo;
7383 lowtrin |= (i>j);
7384 upptrin |= (i<j);
7385 }
7386 }
7387 else
7388 return RSB_ERR_UNSUPPORTED_TYPE;
7389 if(upptrin)
7390 islowtri = RSB_BOOL_FALSE;
7391 if(lowtrin)
7392 isupptri = RSB_BOOL_FALSE;
7393 if(isupptri)
7394 RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
7395 if(islowtri)
7396 RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
7397 if( RSB_XOR(upptrin,lowtrin) )
7398 RSB_DO_FLAG_ADD(flags,RSB_FLAG_TRIANGULAR);
7399 if(flagsp) RSB_DO_FLAG_ADD(*flagsp,flags);
7400 if(m) *m = maxi+1;
7401 if(k) *k = maxj+1;
7402 done:
7403 return RSB_ERR_NO_ERROR;
7404 }
7405
rsb_util_coo_copy(const void * RSB_RESTRICT VA,const rsb_coo_idx_t * RSB_RESTRICT IA,const rsb_coo_idx_t * RSB_RESTRICT JA,void * RSB_RESTRICT oVA,rsb_coo_idx_t * RSB_RESTRICT oIA,rsb_coo_idx_t * RSB_RESTRICT oJA,const rsb_nnz_idx_t nnz,const rsb_type_t typecode,const rsb_coo_idx_t offi,const rsb_coo_idx_t offo)7406 rsb_err_t rsb_util_coo_copy(const void *RSB_RESTRICT VA, const rsb_coo_idx_t * RSB_RESTRICT IA, const rsb_coo_idx_t * RSB_RESTRICT JA, void *RSB_RESTRICT oVA, rsb_coo_idx_t * RSB_RESTRICT oIA, rsb_coo_idx_t * RSB_RESTRICT oJA, const rsb_nnz_idx_t nnz, const rsb_type_t typecode, const rsb_coo_idx_t offi, const rsb_coo_idx_t offo)
7407 {
7408 /*!
7409 * FIXME: unfinished! shall support also typecode-based zeros removal
7410 * */
7411 rsb_nnz_idx_t nzi = 0;
7412
7413 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7414 {
7415 for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7416 {
7417 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7418 ((double*)oVA)[nzi] = ((double*)VA)[nzi];
7419 oIA[nzi] = i-offi+offo;
7420 oJA[nzi] = j-offi+offo;
7421 }
7422 }
7423 else
7424 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7425 {
7426 for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7427 {
7428 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7429 ((float*)oVA)[nzi] = ((float*)VA)[nzi];
7430 oIA[nzi] = i-offi+offo;
7431 oJA[nzi] = j-offi+offo;
7432 }
7433 }
7434 else
7435 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7436 {
7437 for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7438 {
7439 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7440 ((float complex*)oVA)[nzi] = ((float complex*)VA)[nzi];
7441 oIA[nzi] = i-offi+offo;
7442 oJA[nzi] = j-offi+offo;
7443 }
7444 }
7445 else
7446 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7447 {
7448 for(nzi=0;RSB_LIKELY(nzi<nnz);++nzi)
7449 {
7450 rsb_coo_idx_t i = IA[nzi], j = JA[nzi];
7451 ((double complex*)oVA)[nzi] = ((double complex*)VA)[nzi];
7452 oIA[nzi] = i-offi+offo;
7453 oJA[nzi] = j-offi+offo;
7454 }
7455 }
7456 else
7457 return RSB_ERR_UNSUPPORTED_TYPE ;
7458 return RSB_ERR_NO_ERROR;
7459 }
7460
7461 /* sparse blas level 1 equivalent functions */
7462
rsb__BLAS_Xusdot(const rsb_type_t typecode,const enum blas_conj_type conj_arg,const rsb_blas_int_t nz,const void * x,const rsb_blas_int_t * indx,const void * y,const rsb_blas_int_t incy,void * r,const enum blas_base_type index_base)7463 int rsb__BLAS_Xusdot(const rsb_type_t typecode, const enum blas_conj_type conj_arg, const rsb_blas_int_t nz, const void*x, const rsb_blas_int_t*indx, const void*y, const rsb_blas_int_t incy, void*r, const enum blas_base_type index_base)
7464 {
7465 /*!
7466 \rsb_spblasl1_dot_msg
7467 \rsb_warn_untested_msg
7468 */
7469 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7470 {
7471 double*xa = (double*)x;
7472 double*ya = (double*)y;
7473 double*rp = (double*)r;
7474 double ac = ((double)(0));
7475 rsb_blas_int_t nzi, xi;
7476 if( index_base == blas_one_base )
7477 ya-=incy;
7478 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7479 {
7480 xi = indx[nzi];
7481 ac += xa[nzi] * ya[xi*incy];
7482 }
7483 RSB_SET_IF_NOT_NULL(rp,ac);
7484 }
7485 else
7486 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7487 {
7488 float*xa = (float*)x;
7489 float*ya = (float*)y;
7490 float*rp = (float*)r;
7491 float ac = ((float)(0));
7492 rsb_blas_int_t nzi, xi;
7493 if( index_base == blas_one_base )
7494 ya-=incy;
7495 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7496 {
7497 xi = indx[nzi];
7498 ac += xa[nzi] * ya[xi*incy];
7499 }
7500 RSB_SET_IF_NOT_NULL(rp,ac);
7501 }
7502 else
7503 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7504 {
7505 float complex*xa = (float complex*)x;
7506 float complex*ya = (float complex*)y;
7507 float complex*rp = (float complex*)r;
7508 float complex ac = ((float complex)(0));
7509 rsb_blas_int_t nzi, xi;
7510 if( index_base == blas_one_base )
7511 ya-=incy;
7512 if( conj_arg == blas_conj )
7513 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7514 {
7515 xi = indx[nzi];
7516 ac += conjf(xa[nzi]) * ya[xi*incy];
7517 }
7518 else
7519 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7520 {
7521 xi = indx[nzi];
7522 ac += xa[nzi] * ya[xi*incy];
7523 }
7524 RSB_SET_IF_NOT_NULL(rp,ac);
7525 }
7526 else
7527 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7528 {
7529 double complex*xa = (double complex*)x;
7530 double complex*ya = (double complex*)y;
7531 double complex*rp = (double complex*)r;
7532 double complex ac = ((double complex)(0));
7533 rsb_blas_int_t nzi, xi;
7534 if( index_base == blas_one_base )
7535 ya-=incy;
7536 if( conj_arg == blas_conj )
7537 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7538 {
7539 xi = indx[nzi];
7540 ac += conj(xa[nzi]) * ya[xi*incy];
7541 }
7542 else
7543 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7544 {
7545 xi = indx[nzi];
7546 ac += xa[nzi] * ya[xi*incy];
7547 }
7548 RSB_SET_IF_NOT_NULL(rp,ac);
7549 }
7550 else
7551 return RSB_ERR_UNSUPPORTED_TYPE ;
7552 return RSB_ERR_NO_ERROR;
7553 }
7554
rsb__BLAS_Xusaxpy(const rsb_type_t typecode,const rsb_blas_int_t nz,const void * alpha,const void * x,const rsb_blas_int_t * indx,const void * y,const rsb_blas_int_t incy,const enum blas_base_type index_base)7555 int rsb__BLAS_Xusaxpy(const rsb_type_t typecode, const rsb_blas_int_t nz, const void*alpha, const void*x, const rsb_blas_int_t*indx, const void*y, const rsb_blas_int_t incy, const enum blas_base_type index_base)
7556 {
7557 /*!
7558 \rsb_spblasl1_axpy_msg
7559 \rsb_warn_untested_msg
7560 */
7561 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7562 {
7563 const double*xa = (const double*)x;
7564 double*ya = (double*)y;
7565 const double alphav = *(double*)alpha;
7566 rsb_blas_int_t nzi, xi;
7567 if( index_base == blas_one_base )
7568 ya-=incy;
7569 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7570 {
7571 xi = indx[nzi];
7572 ya[nzi*incy] += alphav*xa[xi];
7573 }
7574 }
7575 else
7576 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7577 {
7578 const float*xa = (const float*)x;
7579 float*ya = (float*)y;
7580 const float alphav = *(float*)alpha;
7581 rsb_blas_int_t nzi, xi;
7582 if( index_base == blas_one_base )
7583 ya-=incy;
7584 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7585 {
7586 xi = indx[nzi];
7587 ya[nzi*incy] += alphav*xa[xi];
7588 }
7589 }
7590 else
7591 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7592 {
7593 const float complex*xa = (const float complex*)x;
7594 float complex*ya = (float complex*)y;
7595 const float complex alphav = *(float complex*)alpha;
7596 rsb_blas_int_t nzi, xi;
7597 if( index_base == blas_one_base )
7598 ya-=incy;
7599 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7600 {
7601 xi = indx[nzi];
7602 ya[nzi*incy] += alphav*xa[xi];
7603 }
7604 }
7605 else
7606 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7607 {
7608 const double complex*xa = (const double complex*)x;
7609 double complex*ya = (double complex*)y;
7610 const double complex alphav = *(double complex*)alpha;
7611 rsb_blas_int_t nzi, xi;
7612 if( index_base == blas_one_base )
7613 ya-=incy;
7614 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7615 {
7616 xi = indx[nzi];
7617 ya[nzi*incy] += alphav*xa[xi];
7618 }
7619 }
7620 else
7621 return RSB_ERR_UNSUPPORTED_TYPE ;
7622 return RSB_ERR_NO_ERROR;
7623 }
7624
rsb__BLAS_Xusga(const rsb_type_t typecode,const rsb_blas_int_t nz,const void * y,const rsb_blas_int_t incy,void * x,const rsb_blas_int_t * indx,const enum blas_base_type index_base)7625 int rsb__BLAS_Xusga(const rsb_type_t typecode, const rsb_blas_int_t nz, const void*y, const rsb_blas_int_t incy, void*x, const rsb_blas_int_t*indx, const enum blas_base_type index_base)
7626 {
7627 /*!
7628 \rsb_spblasl1_ga_msg
7629 \rsb_warn_untested_msg
7630 */
7631 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7632 {
7633 double*xa = (double*)x;
7634 const double*ya = (const double*)y;
7635 rsb_blas_int_t nzi,xi;
7636 if( index_base == blas_one_base )
7637 ya-=incy;
7638 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7639 {
7640 xi = indx[nzi];
7641 xa[nzi] = ya[xi*incy];
7642 }
7643 }
7644 else
7645 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7646 {
7647 float*xa = (float*)x;
7648 const float*ya = (const float*)y;
7649 rsb_blas_int_t nzi,xi;
7650 if( index_base == blas_one_base )
7651 ya-=incy;
7652 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7653 {
7654 xi = indx[nzi];
7655 xa[nzi] = ya[xi*incy];
7656 }
7657 }
7658 else
7659 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7660 {
7661 float complex*xa = (float complex*)x;
7662 const float complex*ya = (const float complex*)y;
7663 rsb_blas_int_t nzi,xi;
7664 if( index_base == blas_one_base )
7665 ya-=incy;
7666 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7667 {
7668 xi = indx[nzi];
7669 xa[nzi] = ya[xi*incy];
7670 }
7671 }
7672 else
7673 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7674 {
7675 double complex*xa = (double complex*)x;
7676 const double complex*ya = (const double complex*)y;
7677 rsb_blas_int_t nzi,xi;
7678 if( index_base == blas_one_base )
7679 ya-=incy;
7680 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7681 {
7682 xi = indx[nzi];
7683 xa[nzi] = ya[xi*incy];
7684 }
7685 }
7686 else
7687 return RSB_ERR_UNSUPPORTED_TYPE ;
7688 return RSB_ERR_NO_ERROR;
7689 }
7690
rsb__BLAS_Xusgz(const rsb_type_t typecode,const rsb_blas_int_t nz,void * y,const rsb_blas_int_t incy,void * x,const rsb_blas_int_t * indx,const enum blas_base_type index_base)7691 int rsb__BLAS_Xusgz(const rsb_type_t typecode, const rsb_blas_int_t nz, void*y, const rsb_blas_int_t incy, void*x, const rsb_blas_int_t*indx, const enum blas_base_type index_base)
7692 {
7693 /*!
7694 \rsb_spblasl1_gz_msg
7695 \rsb_warn_untested_msg
7696 */
7697 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7698 {
7699 double*xa = (double*)x;
7700 double*ya = (double*)y;
7701 rsb_blas_int_t nzi,xi;
7702 if( index_base == blas_one_base )
7703 ya-=incy;
7704 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7705 {
7706 xi = indx[nzi];
7707 xa[nzi] = ya[xi*incy];
7708 ya[xi*incy] = ((double)(0));
7709 }
7710 }
7711 else
7712 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7713 {
7714 float*xa = (float*)x;
7715 float*ya = (float*)y;
7716 rsb_blas_int_t nzi,xi;
7717 if( index_base == blas_one_base )
7718 ya-=incy;
7719 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7720 {
7721 xi = indx[nzi];
7722 xa[nzi] = ya[xi*incy];
7723 ya[xi*incy] = ((float)(0));
7724 }
7725 }
7726 else
7727 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7728 {
7729 float complex*xa = (float complex*)x;
7730 float complex*ya = (float complex*)y;
7731 rsb_blas_int_t nzi,xi;
7732 if( index_base == blas_one_base )
7733 ya-=incy;
7734 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7735 {
7736 xi = indx[nzi];
7737 xa[nzi] = ya[xi*incy];
7738 ya[xi*incy] = ((float complex)(0));
7739 }
7740 }
7741 else
7742 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7743 {
7744 double complex*xa = (double complex*)x;
7745 double complex*ya = (double complex*)y;
7746 rsb_blas_int_t nzi,xi;
7747 if( index_base == blas_one_base )
7748 ya-=incy;
7749 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7750 {
7751 xi = indx[nzi];
7752 xa[nzi] = ya[xi*incy];
7753 ya[xi*incy] = ((double complex)(0));
7754 }
7755 }
7756 else
7757 return RSB_ERR_UNSUPPORTED_TYPE ;
7758 return RSB_ERR_NO_ERROR;
7759 }
7760
rsb__BLAS_Xussc(const rsb_type_t typecode,const rsb_blas_int_t nz,const void * x,void * y,const rsb_blas_int_t incy,const rsb_blas_int_t * indx,const enum blas_base_type index_base)7761 int rsb__BLAS_Xussc(const rsb_type_t typecode, const rsb_blas_int_t nz, const void*x, void*y, const rsb_blas_int_t incy, const rsb_blas_int_t*indx, const enum blas_base_type index_base)
7762 {
7763 /*!
7764 \rsb_spblasl1_sc_msg
7765 \rsb_warn_untested_msg
7766 */
7767 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7768 {
7769 const double*xa = (const double*)x;
7770 double*ya = (double*)y;
7771 rsb_blas_int_t nzi,xi;
7772 if( index_base == blas_one_base )
7773 ya-=incy;
7774 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7775 {
7776 xi = indx[nzi];
7777 ya[xi*incy] = xa[nzi];
7778 }
7779 }
7780 else
7781 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
7782 {
7783 const float*xa = (const float*)x;
7784 float*ya = (float*)y;
7785 rsb_blas_int_t nzi,xi;
7786 if( index_base == blas_one_base )
7787 ya-=incy;
7788 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7789 {
7790 xi = indx[nzi];
7791 ya[xi*incy] = xa[nzi];
7792 }
7793 }
7794 else
7795 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7796 {
7797 const float complex*xa = (const float complex*)x;
7798 float complex*ya = (float complex*)y;
7799 rsb_blas_int_t nzi,xi;
7800 if( index_base == blas_one_base )
7801 ya-=incy;
7802 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7803 {
7804 xi = indx[nzi];
7805 ya[xi*incy] = xa[nzi];
7806 }
7807 }
7808 else
7809 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7810 {
7811 const double complex*xa = (const double complex*)x;
7812 double complex*ya = (double complex*)y;
7813 rsb_blas_int_t nzi,xi;
7814 if( index_base == blas_one_base )
7815 ya-=incy;
7816 for(nzi=0;RSB_LIKELY(nzi<nz);++nzi)
7817 {
7818 xi = indx[nzi];
7819 ya[xi*incy] = xa[nzi];
7820 }
7821 }
7822 else
7823 return RSB_ERR_UNSUPPORTED_TYPE ;
7824 return RSB_ERR_NO_ERROR;
7825 }
7826
7827 /* blas level 1 equivalent functions */
7828
rsb__cblas_Xcopy(rsb_type_t typecode,rsb_nnz_idx_t n,const void * x,rsb_nnz_idx_t incx,void * y,rsb_nnz_idx_t incy)7829 rsb_err_t rsb__cblas_Xcopy(rsb_type_t typecode, rsb_nnz_idx_t n, const void * x, rsb_nnz_idx_t incx, void * y, rsb_nnz_idx_t incy)
7830 {
7831 return rsb__xcopy_strided_typed(y,x,0,0,n,typecode,incy,incx);
7832 }
7833
rsb__cblas_Xnrm2(rsb_type_t type,size_t n,const void * a,rsb_nnz_idx_t incA,void * c)7834 rsb_err_t rsb__cblas_Xnrm2(rsb_type_t type, size_t n, const void * a, rsb_nnz_idx_t incA, void * c){
7835 /*!
7836 * c <- sqrt(sum(|a_i|^2))
7837 *
7838 * \param a an array pointer
7839 * \param type a valid type code
7840 * \param n the input array length
7841 * \note see dznrm2 in BLAS
7842 *
7843 * \return \rsberrcodemsg
7844 * */
7845 size_t i;
7846 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
7847 if( type == RSB_NUMERICAL_TYPE_DOUBLE )
7848 {
7849 const double*ta = a;double *tc = c,acc = ((double)(0)),tmp = ((double)(0));
7850 {
7851 for(i=0;i+15<n;i+=16){
7852 acc = fabs(ta[(i+0 )*incA]);tmp += acc*acc;
7853 acc = fabs(ta[(i+1 )*incA]);tmp += acc*acc;
7854 acc = fabs(ta[(i+2 )*incA]);tmp += acc*acc;
7855 acc = fabs(ta[(i+3 )*incA]);tmp += acc*acc;
7856 acc = fabs(ta[(i+4 )*incA]);tmp += acc*acc;
7857 acc = fabs(ta[(i+5 )*incA]);tmp += acc*acc;
7858 acc = fabs(ta[(i+6 )*incA]);tmp += acc*acc;
7859 acc = fabs(ta[(i+7 )*incA]);tmp += acc*acc;
7860 acc = fabs(ta[(i+8 )*incA]);tmp += acc*acc;
7861 acc = fabs(ta[(i+9 )*incA]);tmp += acc*acc;
7862 acc = fabs(ta[(i+10 )*incA]);tmp += acc*acc;
7863 acc = fabs(ta[(i+11 )*incA]);tmp += acc*acc;
7864 acc = fabs(ta[(i+12 )*incA]);tmp += acc*acc;
7865 acc = fabs(ta[(i+13 )*incA]);tmp += acc*acc;
7866 acc = fabs(ta[(i+14 )*incA]);tmp += acc*acc;
7867 acc = fabs(ta[(i+15 )*incA]);tmp += acc*acc;
7868 }
7869 for( ;i<n;++i){ acc = fabs(ta[(i+0 )*incA]);tmp += acc*acc;
7870 }
7871 }
7872 ;
7873 tc[0] = (sqrt(tmp));
7874 }
7875 else
7876 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7877 #ifdef RSB_NUMERICAL_TYPE_FLOAT
7878 if( type == RSB_NUMERICAL_TYPE_FLOAT )
7879 {
7880 const float*ta = a;float *tc = c,acc = ((float)(0)),tmp = ((float)(0));
7881 {
7882 for(i=0;i+15<n;i+=16){
7883 acc = fabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7884 acc = fabsf(ta[(i+1 )*incA]);tmp += acc*acc;
7885 acc = fabsf(ta[(i+2 )*incA]);tmp += acc*acc;
7886 acc = fabsf(ta[(i+3 )*incA]);tmp += acc*acc;
7887 acc = fabsf(ta[(i+4 )*incA]);tmp += acc*acc;
7888 acc = fabsf(ta[(i+5 )*incA]);tmp += acc*acc;
7889 acc = fabsf(ta[(i+6 )*incA]);tmp += acc*acc;
7890 acc = fabsf(ta[(i+7 )*incA]);tmp += acc*acc;
7891 acc = fabsf(ta[(i+8 )*incA]);tmp += acc*acc;
7892 acc = fabsf(ta[(i+9 )*incA]);tmp += acc*acc;
7893 acc = fabsf(ta[(i+10 )*incA]);tmp += acc*acc;
7894 acc = fabsf(ta[(i+11 )*incA]);tmp += acc*acc;
7895 acc = fabsf(ta[(i+12 )*incA]);tmp += acc*acc;
7896 acc = fabsf(ta[(i+13 )*incA]);tmp += acc*acc;
7897 acc = fabsf(ta[(i+14 )*incA]);tmp += acc*acc;
7898 acc = fabsf(ta[(i+15 )*incA]);tmp += acc*acc;
7899 }
7900 for( ;i<n;++i){ acc = fabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7901 }
7902 }
7903 ;
7904 tc[0] = (sqrtf(tmp));
7905 }
7906 else
7907 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7908 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
7909 if( type == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
7910 {
7911 const float complex*ta = a;float *tc = c,acc = ((float complex)(0)),tmp = ((float complex)(0));
7912 {
7913 for(i=0;i+15<n;i+=16){
7914 acc = cabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7915 acc = cabsf(ta[(i+1 )*incA]);tmp += acc*acc;
7916 acc = cabsf(ta[(i+2 )*incA]);tmp += acc*acc;
7917 acc = cabsf(ta[(i+3 )*incA]);tmp += acc*acc;
7918 acc = cabsf(ta[(i+4 )*incA]);tmp += acc*acc;
7919 acc = cabsf(ta[(i+5 )*incA]);tmp += acc*acc;
7920 acc = cabsf(ta[(i+6 )*incA]);tmp += acc*acc;
7921 acc = cabsf(ta[(i+7 )*incA]);tmp += acc*acc;
7922 acc = cabsf(ta[(i+8 )*incA]);tmp += acc*acc;
7923 acc = cabsf(ta[(i+9 )*incA]);tmp += acc*acc;
7924 acc = cabsf(ta[(i+10 )*incA]);tmp += acc*acc;
7925 acc = cabsf(ta[(i+11 )*incA]);tmp += acc*acc;
7926 acc = cabsf(ta[(i+12 )*incA]);tmp += acc*acc;
7927 acc = cabsf(ta[(i+13 )*incA]);tmp += acc*acc;
7928 acc = cabsf(ta[(i+14 )*incA]);tmp += acc*acc;
7929 acc = cabsf(ta[(i+15 )*incA]);tmp += acc*acc;
7930 }
7931 for( ;i<n;++i){ acc = cabsf(ta[(i+0 )*incA]);tmp += acc*acc;
7932 }
7933 }
7934 ;
7935 tc[0] = crealf(csqrtf(tmp));
7936 }
7937 else
7938 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7939 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
7940 if( type == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
7941 {
7942 const double complex*ta = a;double *tc = c,acc = ((double complex)(0)),tmp = ((double complex)(0));
7943 {
7944 for(i=0;i+15<n;i+=16){
7945 acc = cabs(ta[(i+0 )*incA]);tmp += acc*acc;
7946 acc = cabs(ta[(i+1 )*incA]);tmp += acc*acc;
7947 acc = cabs(ta[(i+2 )*incA]);tmp += acc*acc;
7948 acc = cabs(ta[(i+3 )*incA]);tmp += acc*acc;
7949 acc = cabs(ta[(i+4 )*incA]);tmp += acc*acc;
7950 acc = cabs(ta[(i+5 )*incA]);tmp += acc*acc;
7951 acc = cabs(ta[(i+6 )*incA]);tmp += acc*acc;
7952 acc = cabs(ta[(i+7 )*incA]);tmp += acc*acc;
7953 acc = cabs(ta[(i+8 )*incA]);tmp += acc*acc;
7954 acc = cabs(ta[(i+9 )*incA]);tmp += acc*acc;
7955 acc = cabs(ta[(i+10 )*incA]);tmp += acc*acc;
7956 acc = cabs(ta[(i+11 )*incA]);tmp += acc*acc;
7957 acc = cabs(ta[(i+12 )*incA]);tmp += acc*acc;
7958 acc = cabs(ta[(i+13 )*incA]);tmp += acc*acc;
7959 acc = cabs(ta[(i+14 )*incA]);tmp += acc*acc;
7960 acc = cabs(ta[(i+15 )*incA]);tmp += acc*acc;
7961 }
7962 for( ;i<n;++i){ acc = cabs(ta[(i+0 )*incA]);tmp += acc*acc;
7963 }
7964 }
7965 ;
7966 tc[0] = creal(csqrt(tmp));
7967 }
7968 else
7969 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
7970 return RSB_ERR_UNSUPPORTED_TYPE ;
7971 return RSB_ERR_NO_ERROR;
7972 }
7973
rsb__cblas_Xdotu_sub(rsb_type_t type,size_t n,const void * x,rsb_nnz_idx_t incx,const void * y,rsb_nnz_idx_t incy,void * dotu)7974 rsb_err_t rsb__cblas_Xdotu_sub(rsb_type_t type, size_t n, const void * x, rsb_nnz_idx_t incx, const void * y, rsb_nnz_idx_t incy, void *dotu){
7975 /*!
7976 * */
7977 return rsb__vector_mult_sum(x,y,dotu,type,n,incx,incy);
7978 }
7979
rsb__cblas_Xscal(rsb_type_t type,size_t n,const void * alphap,void * a,size_t stride)7980 rsb_err_t rsb__cblas_Xscal(rsb_type_t type, size_t n, const void * alphap, void * a, size_t stride){
7981 /*!
7982 * a <- a * alpha
7983 * */
7984 return rsb_strided_vector_scale(a,alphap,type,n,stride);
7985 }
7986
rsb__coo_insertion_sort(rsb_type_t typecode,void * VB,rsb_coo_idx_t * IB,rsb_coo_idx_t * JB,rsb_nnz_idx_t offA,rsb_nnz_idx_t nnzA)7987 rsb_err_t rsb__coo_insertion_sort(rsb_type_t typecode, void* VB, rsb_coo_idx_t * IB, rsb_coo_idx_t * JB, rsb_nnz_idx_t offA, rsb_nnz_idx_t nnzA)
7988 {
7989 /* only for *small* arrays, where allocation of a temporary array is not justified */
7990 rsb_coo_idx_t * IA = NULL, *JA = NULL;
7991 rsb_nnz_idx_t i, j;
7992
7993 IA = IB + offA;
7994 JA = JB + offA;
7995
7996 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
7997 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
7998 {
7999 double * VA = (double*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8000 for(i=1;i<nnzA;++i)
8001 for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8002 {
8003 RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8004 RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8005 RSB_SWAP(double ,VA[j],VA[j-1]);
8006 }
8007 }
8008 else
8009 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8010 #ifdef RSB_NUMERICAL_TYPE_FLOAT
8011 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
8012 {
8013 float * VA = (float*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8014 for(i=1;i<nnzA;++i)
8015 for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8016 {
8017 RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8018 RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8019 RSB_SWAP(float ,VA[j],VA[j-1]);
8020 }
8021 }
8022 else
8023 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8024 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
8025 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
8026 {
8027 float complex * VA = (float complex*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8028 for(i=1;i<nnzA;++i)
8029 for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8030 {
8031 RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8032 RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8033 RSB_SWAP(float complex ,VA[j],VA[j-1]);
8034 }
8035 }
8036 else
8037 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8038 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
8039 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
8040 {
8041 double complex * VA = (double complex*) RSB_TYPED_OFF_PTR(typecode,VB,offA);
8042 for(i=1;i<nnzA;++i)
8043 for(j=i;j>0 && RSB_COO_LT(IA[j],JA[j],IA[j-1],JA[j-1]);--j)
8044 {
8045 RSB_SWAP(rsb_coo_idx_t,IA[j],IA[j-1]);
8046 RSB_SWAP(rsb_coo_idx_t,JA[j],JA[j-1]);
8047 RSB_SWAP(double complex ,VA[j],VA[j-1]);
8048 }
8049 }
8050 else
8051 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
8052 return RSB_ERR_UNSUPPORTED_TYPE ;
8053 return RSB_ERR_NO_ERROR;
8054 }
8055
rsb__coo_to_lr(void * RSB_RESTRICT VBu,rsb_coo_idx_t * RSB_RESTRICT IB,rsb_coo_idx_t * RSB_RESTRICT JB,void * RSB_RESTRICT VAu,rsb_coo_idx_t * RSB_RESTRICT IA,rsb_coo_idx_t * RSB_RESTRICT JA,rsb_coo_idx_t mj,rsb_nnz_idx_t nnzA,rsb_nnz_idx_t nzoffB,rsb_nnz_idx_t nzoffA,rsb_nnz_idx_t * RSB_RESTRICT nzlp,rsb_nnz_idx_t * RSB_RESTRICT nzrp,rsb_coo_idx_t iadd,rsb_coo_idx_t jadd,rsb_type_t typecode)8056 void rsb__coo_to_lr( void * RSB_RESTRICT VBu, rsb_coo_idx_t*RSB_RESTRICT IB, rsb_coo_idx_t*RSB_RESTRICT JB, void * RSB_RESTRICT VAu, rsb_coo_idx_t*RSB_RESTRICT IA, rsb_coo_idx_t*RSB_RESTRICT JA, rsb_coo_idx_t mj, rsb_nnz_idx_t nnzA, rsb_nnz_idx_t nzoffB, rsb_nnz_idx_t nzoffA, rsb_nnz_idx_t*RSB_RESTRICT nzlp, rsb_nnz_idx_t*RSB_RESTRICT nzrp, rsb_coo_idx_t iadd, rsb_coo_idx_t jadd, rsb_type_t typecode)
8057 {
8058 /*
8059 * Given COO arrays matrices A an (temporary) B, stores the coefficients left of the mj-th column before the one coming after it, respecting the row major ordering.
8060 * A serial function.
8061 * */
8062 rsb_nnz_idx_t nzl = 0, nzr = 0, nzi = 0;
8063
8064 RSB_DEBUG_ASSERT(IA!=IB);
8065 RSB_DEBUG_ASSERT(JA!=JB);
8066 RSB_DEBUG_ASSERT(mtxAp);
8067
8068 IA += nzoffA;
8069 JA += nzoffA;
8070
8071 IB += nzoffB;
8072 JB += nzoffB;
8073
8074 switch(typecode)
8075 {
8076 /* supported (double,float,float complex,double complex) */
8077 case RSB_NUMERICAL_TYPE_DOUBLE :
8078 {
8079 double * VA = VAu;
8080 double * VB = VBu;
8081 RSB_DEBUG_ASSERT(VA!=VB);
8082
8083 VA += nzoffA;
8084 VB += nzoffB;
8085
8086 for(nzi=0;nzi<nnzA;++nzi)
8087 {
8088 if( JA[nzi] < mj )
8089 {
8090 IB[nzl] = IA[nzi] + iadd;
8091 JB[nzl] = JA[nzi] ;
8092 VB[nzl] = VA[nzi];
8093 nzl++;
8094 }
8095 else
8096 {
8097 nzr++;
8098 IB[nnzA-nzr] = IA[nzi] + iadd;
8099 JB[nnzA-nzr] = JA[nzi] + jadd;
8100 VB[nnzA-nzr] = VA[nzi];
8101 }
8102 }
8103
8104 /* copy left quadrant back to A */
8105 for(nzi=0;nzi<nzl ;++nzi)
8106 {
8107 IA[nzi] = IB[nzi];
8108 JA[nzi] = JB[nzi];
8109 VA[nzi] = VB[nzi];
8110 }
8111
8112 /* copy right quadrant back to A */
8113 for( ;nzi<nnzA;++nzi)
8114 {
8115 IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8116 JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8117 VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8118 }
8119 }
8120 break;
8121 case RSB_NUMERICAL_TYPE_FLOAT :
8122 {
8123 float * VA = VAu;
8124 float * VB = VBu;
8125 RSB_DEBUG_ASSERT(VA!=VB);
8126
8127 VA += nzoffA;
8128 VB += nzoffB;
8129
8130 for(nzi=0;nzi<nnzA;++nzi)
8131 {
8132 if( JA[nzi] < mj )
8133 {
8134 IB[nzl] = IA[nzi] + iadd;
8135 JB[nzl] = JA[nzi] ;
8136 VB[nzl] = VA[nzi];
8137 nzl++;
8138 }
8139 else
8140 {
8141 nzr++;
8142 IB[nnzA-nzr] = IA[nzi] + iadd;
8143 JB[nnzA-nzr] = JA[nzi] + jadd;
8144 VB[nnzA-nzr] = VA[nzi];
8145 }
8146 }
8147
8148 /* copy left quadrant back to A */
8149 for(nzi=0;nzi<nzl ;++nzi)
8150 {
8151 IA[nzi] = IB[nzi];
8152 JA[nzi] = JB[nzi];
8153 VA[nzi] = VB[nzi];
8154 }
8155
8156 /* copy right quadrant back to A */
8157 for( ;nzi<nnzA;++nzi)
8158 {
8159 IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8160 JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8161 VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8162 }
8163 }
8164 break;
8165 case RSB_NUMERICAL_TYPE_FLOAT_COMPLEX :
8166 {
8167 float complex * VA = VAu;
8168 float complex * VB = VBu;
8169 RSB_DEBUG_ASSERT(VA!=VB);
8170
8171 VA += nzoffA;
8172 VB += nzoffB;
8173
8174 for(nzi=0;nzi<nnzA;++nzi)
8175 {
8176 if( JA[nzi] < mj )
8177 {
8178 IB[nzl] = IA[nzi] + iadd;
8179 JB[nzl] = JA[nzi] ;
8180 VB[nzl] = VA[nzi];
8181 nzl++;
8182 }
8183 else
8184 {
8185 nzr++;
8186 IB[nnzA-nzr] = IA[nzi] + iadd;
8187 JB[nnzA-nzr] = JA[nzi] + jadd;
8188 VB[nnzA-nzr] = VA[nzi];
8189 }
8190 }
8191
8192 /* copy left quadrant back to A */
8193 for(nzi=0;nzi<nzl ;++nzi)
8194 {
8195 IA[nzi] = IB[nzi];
8196 JA[nzi] = JB[nzi];
8197 VA[nzi] = VB[nzi];
8198 }
8199
8200 /* copy right quadrant back to A */
8201 for( ;nzi<nnzA;++nzi)
8202 {
8203 IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8204 JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8205 VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8206 }
8207 }
8208 break;
8209 case RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX :
8210 {
8211 double complex * VA = VAu;
8212 double complex * VB = VBu;
8213 RSB_DEBUG_ASSERT(VA!=VB);
8214
8215 VA += nzoffA;
8216 VB += nzoffB;
8217
8218 for(nzi=0;nzi<nnzA;++nzi)
8219 {
8220 if( JA[nzi] < mj )
8221 {
8222 IB[nzl] = IA[nzi] + iadd;
8223 JB[nzl] = JA[nzi] ;
8224 VB[nzl] = VA[nzi];
8225 nzl++;
8226 }
8227 else
8228 {
8229 nzr++;
8230 IB[nnzA-nzr] = IA[nzi] + iadd;
8231 JB[nnzA-nzr] = JA[nzi] + jadd;
8232 VB[nnzA-nzr] = VA[nzi];
8233 }
8234 }
8235
8236 /* copy left quadrant back to A */
8237 for(nzi=0;nzi<nzl ;++nzi)
8238 {
8239 IA[nzi] = IB[nzi];
8240 JA[nzi] = JB[nzi];
8241 VA[nzi] = VB[nzi];
8242 }
8243
8244 /* copy right quadrant back to A */
8245 for( ;nzi<nnzA;++nzi)
8246 {
8247 IA[nzi] = IB[nnzA-(1+nzi-nzl)];
8248 JA[nzi] = JB[nnzA-(1+nzi-nzl)];
8249 VA[nzi] = VB[nnzA-(1+nzi-nzl)];
8250 }
8251 }
8252 break;
8253 /* unsupported type */
8254 default :
8255 RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS
8256 }
8257
8258 *nzlp = nzl;
8259 *nzrp = nzr;
8260 }
8261 #ifdef __cplusplus
8262 }
8263 #endif /* __cplusplus */
8264
8265 /* @endcond */
8266