1 /*
2 
3 Copyright (C) 2008-2021 Michele Martone
4 
5 This file is part of librsb.
6 
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11 
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16 
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20 
21 */
22 /* @cond INNERDOC  */
23 /**
24  * @file
25  * @author Michele Martone
26  * @brief
27  *
28  * Low level routines and tools for our sparse matrix formats implementations.
29  * \internal
30  *
31  * */
32 #include "rsb_common.h"
33 #include "rsb_util.h"
34 #include "rsb.h"
35 #include "rsb_types.h"
36 #include "rsb_unroll.h"
37 #ifdef RSB_HAVE_SYS_UTSNAME_H
38 #include <sys/utsname.h>	/* uname */
39 #endif /* RSB_HAVE_SYS_UTSNAME_H  */
40 
41 #define RSB_WANT_NULL_ALLOCATED_ZERO_NNZ_COO_MATRICES_ARRAYS 1 /* 20110419 a bugfix for the nnz == 0 case vs realloc and memory counters */
42 #define RSB_TOKLEN 16
43 #define RSB_WANT_ZERO_ON_DESTROY 0	/* a useful debug option */
44 #define rsb__strcpy strcpy
45 
46 #define RSB_ILLEGAL_FLAGS	0xFFFFFFFF
47 #define RSB_WANT_DOUBLE_MATRIX_FREE_DETECT 0
48 
49 #if RSB_WANT_DOUBLE_MATRIX_FREE_DETECT
50 #define RSB_CONDITIONAL_FREE_MTXAP(MTXAP) if(MTXAP){ if( (MTXAP)->flags == RSB_ILLEGAL_FLAGS ) { RSB_ERROR("Probable attempt to free already freed matrix at %p !\n",(MTXAP)); } (MTXAP)->flags = RSB_ILLEGAL_FLAGS; RSB_CONDITIONAL_FREE(MTXAP); }
51 #else
52 #define RSB_CONDITIONAL_FREE_MTXAP(MTXAP) RSB_CONDITIONAL_FREE(MTXAP)
53 #endif
54 
55 RSB_INTERNALS_COMMON_HEAD_DECLS
56 
rsb__init_options_t(struct rsb_options_t * o)57 void * rsb__init_options_t(struct rsb_options_t *o)
58 {
59 	/*!
60 	 * \ingroup gr_internals
61 	 * initializes a rsb_options_t struct to default 'vanilla' values
62 	 * \return the input address
63 	 */
64 	if(!o)
65 		goto err;
66 	RSB_BZERO_P(o);
67 err:
68 	return o;
69 }
70 
rsb__destroy_options_t(struct rsb_options_t * o)71 void * rsb__destroy_options_t(struct rsb_options_t *o)
72 {
73 	/*!
74 	 * \ingroup gr_internals
75 	 * frees the given options structure and any of its allocated arrays
76 	 *
77 	 * \return the input address
78 	 */
79 	if(!o)
80 		goto err;
81 	RSB_CONDITIONAL_FREE(o->bitmap);
82 	RSB_CONDITIONAL_FREE(o);
83 err:
84 	return o;
85 }
86 
rsb__is_valid_options_t(const struct rsb_options_t * o,rsb_coo_idx_t m,rsb_coo_idx_t k)87 const void * rsb__is_valid_options_t(const struct rsb_options_t *o, rsb_coo_idx_t m, rsb_coo_idx_t k)
88 {
89 	/*!
90 	 * \ingroup gr_internals
91 	 * checks if the structure members which are to be used for
92 	 * the creation of a matrix have meaningful values.
93 	 *
94 	 * \return the input address in case of success, NULL otherwise
95 	 * */
96 	if(!o)
97 		goto err;
98 
99 	if(RSB_INVALID_COO_INDEX(m) || RSB_INVALID_COO_INDEX(k))
100 		goto err;
101 
102 	/*
103 	 * someday:
104 	 *
105 	 * if(already_initialized) {
106 	 * 	if(mtxAp->el_size<1)		goto err;
107 	 *	if(! o->bitmap)			goto err;
108 	 *	....
109 	 */
110 	return o;
111 err:
112 	return NULL;
113 }
114 
rsb__reallocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp,rsb_nnz_idx_t nnnz)115 void * rsb__reallocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp, rsb_nnz_idx_t nnnz)
116 {
117 	/*!
118 	 * \ingroup gr_internals
119 	 * \return On success, return the input address; on failure, NULL.
120 	 */
121 	size_t es = 0;
122 	void * check = NULL;
123 
124 	if(!cmp)
125 		goto err;
126 
127         es = RSB_NUMERICAL_TYPE_SIZE(cmp->typecode);
128 
129 	if(es < 1)
130 		goto err;
131 
132 	if( nnnz == 0 && RSB_WANT_NULL_ALLOCATED_ZERO_NNZ_COO_MATRICES_ARRAYS )
133 	{
134 		cmp->IA = NULL;
135 		cmp->JA = NULL;
136 		cmp->VA = NULL;
137 		goto done;
138 	}
139 
140 	check = rsb__realloc(cmp->IA,sizeof(rsb_coo_idx_t)*(nnnz));
141 	if(!check)
142 		goto err;
143 	cmp->IA = check;
144 
145 	check = rsb__realloc(cmp->JA,sizeof(rsb_coo_idx_t)*(nnnz));
146 	if(!check)
147 		goto err;
148 	cmp->JA = check;
149 
150 	check = rsb__realloc(cmp->VA,es*nnnz);
151 	if(!check)
152 		goto err;
153 	cmp->VA = check;
154 
155 	if(!cmp->IA || !cmp->JA || !cmp->VA)
156 		goto cerr;
157 done:
158 	cmp->nnz = nnnz;
159 	return cmp;
160 cerr:
161 	/* critical error (should not happen) */
162 	if(cmp)
163 	{
164 		RSB_CONDITIONAL_FREE(cmp->IA);
165 		RSB_CONDITIONAL_FREE(cmp->JA);
166 		RSB_CONDITIONAL_FREE(cmp->VA);
167 	}
168 err:
169 	return NULL;
170 }
171 
rsb__callocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp)172 void * rsb__callocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
173 {
174 	return rsb__xallocate_coo_matrix_t(cmp,RSB_BOOL_TRUE,RSB_FLAG_NOFLAGS);
175 }
176 
rsb__allocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp)177 void * rsb__allocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
178 {
179 	return rsb__xallocate_coo_matrix_t(cmp,RSB_BOOL_FALSE,RSB_FLAG_NOFLAGS);
180 }
181 
rsb__xallocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp,rsb_bool_t want_calloc,rsb_flags_t flags)182 void * rsb__xallocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp, rsb_bool_t want_calloc, rsb_flags_t flags)
183 {
184 	/*!
185 	 * \ingroup gr_internals
186 	 * \return the input address on success, NULL on error
187 	 */
188 	size_t es = 0, nnz = 0, rnz = 0;
189 
190 	if(!cmp)
191 	{
192 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
193 	}
194 	rnz = nnz = cmp->nnz;
195 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_DEFAULT_CSR_MATRIX_FLAGS))
196 		rnz = RSB_MAX(nnz,cmp->nr+1);
197 
198         es = RSB_NUMERICAL_TYPE_SIZE(cmp->typecode);
199 
200 	if(es<1)
201 	{
202 		RSB_ERROR("typecode seem wrong: 0%x\n",cmp->typecode);
203 		RSB_PERR_GOTO(err,RSB_ERRM_WTC);
204 	}
205 	cmp->IA = NULL;
206 	cmp->JA = NULL;
207 	cmp->VA = NULL;
208 	/* the above will avoid problems in case of error */
209 
210 	if(want_calloc == RSB_BOOL_TRUE)
211 		cmp->IA = rsb__calloc(sizeof(rsb_coo_idx_t)*(rnz)),
212 		cmp->JA = rsb__calloc(sizeof(rsb_coo_idx_t)*(nnz)),
213 		cmp->VA = rsb__calloc_vector(nnz, cmp->typecode);
214 	else
215 		cmp->IA = rsb__malloc(sizeof(rsb_coo_idx_t)*(rnz)),
216 		cmp->JA = rsb__malloc(sizeof(rsb_coo_idx_t)*(nnz)),
217 		cmp->VA = rsb__malloc_vector(nnz, cmp->typecode);
218 
219 	if(!cmp->IA || !cmp->JA || !cmp->VA)
220 	{
221 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
222 	}
223 	/*
224 	 * Note: we do not free cmp itself.
225 	 */
226 	return cmp;
227 err:
228 	if(cmp)
229 	{
230 		RSB_CONDITIONAL_FREE(cmp->IA);
231 		RSB_CONDITIONAL_FREE(cmp->JA);
232 		RSB_CONDITIONAL_FREE(cmp->VA);
233 	}
234 	return NULL;
235 }
236 
rsb__destroy_coo_matrix_t(struct rsb_coo_matrix_t * cmp)237 void * rsb__destroy_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
238 {
239 	/*!
240 	 * \ingroup gr_internals
241 	 * frees the given structure allocated arrays
242 	 *
243 	 * \return the input address
244 	 */
245 	if(!cmp)
246 		return cmp;
247 	RSB_CONDITIONAL_FREE(cmp->IA);
248 	RSB_CONDITIONAL_FREE(cmp->JA);
249 	RSB_CONDITIONAL_FREE(cmp->VA);
250 	/* RSB_BZERO_P(cmp); */
251 	/*
252 	 * Note: we do not free cmp itself.
253 	 */
254 	return cmp;
255 }
256 
rsb__transpose_coo_matrix_t(struct rsb_coo_matrix_t * cmp)257 void * rsb__transpose_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
258 {
259 	/*!
260 	 * \ingroup gr_internals
261 	 * transposes symbolically the given matrix
262 	 *
263 	 * \return the input address
264 	 */
265 	if(!cmp)
266 		return cmp;
267 	RSB_SWAP(rsb_coo_idx_t ,cmp->nr, cmp->nc );
268 	RSB_SWAP(rsb_coo_idx_t*,cmp->IA,cmp->JA);
269 	return cmp;
270 }
271 
rsb__init_blank_pointers(struct rsb_mtx_t * mtxAp)272 void * rsb__init_blank_pointers(struct rsb_mtx_t *mtxAp)
273 {
274 	/*!
275 	 * \ingroup gr_internals
276 	 * \return the input address
277 	 * */
278 	if(!mtxAp)
279 		return mtxAp;
280 
281 	mtxAp->VA = NULL;
282 	mtxAp->indptr = NULL;
283 	mtxAp->bindx = NULL;
284 	mtxAp->rpntr = NULL;
285 	mtxAp->cpntr = NULL;
286 	mtxAp->bpntr = NULL;
287 #if RSB_WANT_BITMAP
288 	mtxAp->options = NULL;		/* exactly ... */
289 #endif /* RSB_WANT_BITMAP */
290 	mtxAp->mpntr = NULL;
291 	mtxAp->Mpntr = NULL;
292 	mtxAp->all_leaf_matrices = NULL;
293 	mtxAp->sm[0] = NULL;
294 	mtxAp->sm[1] = NULL;
295 	mtxAp->sm[2] = NULL;
296 	mtxAp->sm[3] = NULL;
297 
298 	return mtxAp;
299 }
300 
rsb__fill_struct(struct rsb_mtx_t * mtxAp,void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,rsb_flags_t flags)301 rsb_err_t rsb__fill_struct(struct rsb_mtx_t *mtxAp, void * VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, rsb_flags_t flags)
302 {
303 	/*!
304 	 * \ingroup gr_internals
305 	 * initializes a rsb_mtx_t struct to default 'vanilla' values
306 	 * \return the input address
307 	 * FIXME: rsb__fill_struct -> rsb__mtx_init
308 	 */
309 
310 	if(!mtxAp)
311 		return RSB_ERR_GENERIC_ERROR;
312 
313 	rsb__init_struct(mtxAp);/* redundant ?*/
314 	mtxAp->VA = VA;
315 	mtxAp->bindx = JA;
316 	mtxAp->bpntr = IA;
317 	mtxAp->nr = m;
318 	mtxAp->nc = k;
319 	mtxAp->flags = flags;
320 	mtxAp->typecode = typecode;
321 
322 	return RSB_ERR_NO_ERROR;
323 }
324 
rsb__fill_coo_struct(struct rsb_coo_matrix_t * mtxAp,void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_nnz_idx_t nnz,rsb_type_t typecode)325 void * rsb__fill_coo_struct(struct rsb_coo_matrix_t *mtxAp, void * VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_nnz_idx_t nnz, rsb_type_t typecode)
326 {
327 	if(!mtxAp)
328 		return NULL;
329 	mtxAp->IA = IA;
330 	mtxAp->JA = JA;
331 	mtxAp->VA = VA;
332 	mtxAp->nnz = nnz;
333 	mtxAp->nr = m;
334 	mtxAp->nc = k;
335 	mtxAp->typecode = typecode;
336 	return mtxAp;
337 }
338 
rsb__init_struct(struct rsb_mtx_t * mtxAp)339 void * rsb__init_struct(struct rsb_mtx_t *mtxAp)
340 {
341 	/*!
342 	 * \ingroup gr_internals
343 	 * initializes a rsb_mtx_t struct to default 'vanilla' values
344 	 * \return the input address
345 	 * */
346 	if(!mtxAp)
347 		return mtxAp;
348 
349 	rsb__init_blank_pointers(mtxAp);
350 
351 	mtxAp->flags = RSB_FLAG_NOFLAGS ;
352 
353 	mtxAp->sat = RSB_TIME_ZERO;
354 	mtxAp->eit = RSB_TIME_ZERO;
355 	mtxAp->pet = RSB_TIME_ZERO;
356 	mtxAp->est = RSB_TIME_ZERO;
357 	mtxAp->tat = RSB_TIME_ZERO;
358 	mtxAp->cpt = RSB_TIME_ZERO;
359 	mtxAp->rpt = RSB_TIME_ZERO;
360 
361 	return mtxAp;
362 }
363 
rsb__destroy_inner(struct rsb_mtx_t * mtxAp)364 void * rsb__destroy_inner(struct rsb_mtx_t *mtxAp)
365 {
366 	/*!
367 	 * \ingroup gr_internals
368 	 * \input mtxAp is a pointer to a valid matrix structure.
369 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
370 	 *
371 	 * Deallocates the guts of a sparse matrix.
372 	 * (will leave the struct in an inconsistent state)
373 	 */
374 	rsb_submatrix_idx_t i,j;
375 	struct rsb_mtx_t * submatrix = NULL;
376 
377 	//RSB_STDOUT("destroying matrix %p\n",mtxAp);
378 
379 	if(!mtxAp)
380 		goto ret;
381 
382 	if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS))
383 	{
384 		if(rsb__is_root_matrix(mtxAp))
385 		{
386 			/* FIXME: unfinished, temporary */
387 			/* this is a trick: fitting the whole recursive matrix in three arrays */
388 			void *IA = NULL,*JA = NULL,*VA = NULL;
389 			rsb_bool_t is_bio = rsb__do_is_matrix_binary_loaded(mtxAp); // binary I/O matrix
390 			struct rsb_mtx_t *fsm = rsb__do_get_first_submatrix(mtxAp);
391 			rsb_flags_t flags = mtxAp->flags;
392 
393 			if(!is_bio)
394 				RSB_CONDITIONAL_FREE(mtxAp->all_leaf_matrices)	// ?!
395 			JA = fsm->bindx;
396 			VA = fsm->VA;
397 			if(!is_bio)
398 			{
399 				//IA = fsm->bpntr-(fsm->nr+1);
400 				IA = fsm->bpntr;
401 			}
402 			else
403 				IA = mtxAp;
404 			if(!is_bio)
405 				RSB_CONDITIONAL_FREE_MTXAP(mtxAp)// extra allocation
406 //			RSB_INFO("VA:%p, IA:%p, JA:%p\n",VA,IA,JA);
407 			if(!RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
408 			{
409 				RSB_CONDITIONAL_FREE(IA);/* these arrays are allowed to be NULL, as it happens during conversions */
410 				RSB_CONDITIONAL_FREE(JA);
411 				RSB_CONDITIONAL_FREE(VA);
412 			}
413 		}
414 		return NULL;/* no deallocation, in this case */
415 	}
416 
417 	if(rsb__is_recursive_matrix(mtxAp->flags))
418 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
419 	{
420 		if(submatrix)
421 		{
422 			rsb__do_mtx_free(submatrix);
423 		}
424 	}
425 
426 	if(!(mtxAp->flags & RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR))
427 	if(!RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
428 	{
429 		RSB_CONDITIONAL_FREE(mtxAp->VA);
430 		RSB_CONDITIONAL_FREE(mtxAp->bindx);
431 		RSB_CONDITIONAL_FREE(mtxAp->bpntr);
432 	}
433 
434 	RSB_CONDITIONAL_FREE(mtxAp->indptr);
435 
436 #if RSB_WANT_BITMAP
437 	if(mtxAp->options)
438 		rsb__destroy_options_t(mtxAp->options);
439 #endif /* RSB_WANT_BITMAP */
440 
441 	if((mtxAp->flags & RSB_FLAG_OWN_PARTITIONING_ARRAYS)!=0)
442 	{
443 		RSB_CONDITIONAL_FREE(mtxAp->rpntr);
444 		RSB_CONDITIONAL_FREE(mtxAp->cpntr);
445 	}
446 #if RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS
447 	RSB_CONDITIONAL_FREE(mtxAp->all_leaf_matrices);
448 #endif /* RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS */
449 	RSB_BZERO_P(mtxAp);/* this enforces correct usage */
450 ret:
451 	return NULL;
452 }
453 
rsb__do_get_first_submatrix(const struct rsb_mtx_t * mtxAp)454 struct rsb_mtx_t * rsb__do_get_first_submatrix(const struct rsb_mtx_t *mtxAp)
455 {
456 	/*!
457 	 * \ingroup gr_internals
458 	 * */
459 	if(!mtxAp)
460 		return NULL;
461 
462 	if(rsb__is_recursive_matrix(mtxAp->flags))
463 	{
464 		rsb_submatrix_idx_t i,j;
465 		struct rsb_mtx_t * submatrix;
466 		RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
467 		if(submatrix)
468 			return rsb__do_get_first_submatrix(submatrix);
469 	}
470 	return (struct rsb_mtx_t*)mtxAp;/* FIXME */
471 }
472 
rsb__do_mtx_free(struct rsb_mtx_t * mtxAp)473 void * rsb__do_mtx_free(struct rsb_mtx_t *mtxAp)
474 {
475 	/*!
476 	 * \ingroup gr_internals
477 	 * \param mtxAp a pointer to a matrix structure
478 	 *
479 	 * Will destroy a valid matrix and deallocate all of its allocated data.
480 	 * */
481 	rsb_flags_t flags;
482 
483 	if(!mtxAp)
484 		goto ret;
485 
486 	if( RSB_MTX_HBDF( mtxAp ) )
487 	{
488 		blas_sparse_matrix bmtxA = RSB_MTX_HBDFH(mtxAp);
489 		rsb__BLAS_Xusds(bmtxA);
490 		RSB_CONDITIONAL_FREE_MTXAP(mtxAp);
491 		goto ret;
492 	}
493 
494 	flags = mtxAp->flags;
495 
496 	rsb__destroy_inner(mtxAp);
497 
498 	if(!RSB_DO_FLAG_HAS(flags,RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS))
499 	{
500 		if(mtxAp && RSB_WANT_ZERO_ON_DESTROY)
501 		{
502 			RSB_BZERO_P(mtxAp);
503 		}
504 		RSB_CONDITIONAL_FREE_MTXAP(mtxAp);
505 	}
506 ret:
507 	return mtxAp;
508 }
509 
510 
511 #if RSB_WANT_BITMAP
rsb__get_sizeof_options(const struct rsb_options_t * o,rsb_blk_idx_t M_b,rsb_blk_idx_t K_b)512 static size_t rsb__get_sizeof_options(const struct rsb_options_t *o, rsb_blk_idx_t M_b, rsb_blk_idx_t K_b)
513 {
514 	/*!
515 	 * \ingroup gr_internals
516 	 * \return memory usage
517 	 * \param o a pointer to a valid rsb_options_t structure
518 	 *
519 	 * \return the amount of memory allocated for this structure, deeply
520 	 * */
521 	size_t count = 0;
522 
523 	if(!o )
524 		return 0;
525 
526 	count += sizeof(*o);
527 
528 	/* we allocate a new options structure */
529 	if(o->bitmap)
530 		count += RSB_BYTES_PER_BITMAP(M_b,K_b) ;
531 
532 	return count;
533 }
534 #endif /* RSB_WANT_BITMAP */
535 
rsb__get_sizeof(const struct rsb_mtx_t * mtxAp)536 size_t rsb__get_sizeof(const struct rsb_mtx_t *mtxAp )
537 {
538 	/*!
539 	 * \ingroup gr_internals
540 	 * \param mtxAp a pointer to a valid rsb_mtx_t structure
541 	 * \return the amount of memory allocated for this structure, deeply (indices + coefficients).
542 	 * */
543 	size_t count = 0;
544 	struct rsb_mtx_t * submatrix = NULL;
545 	rsb_submatrix_idx_t i,j;
546 	rsb_bool_t istrec = RSB_BOOL_FALSE;
547 
548 	if(!mtxAp )
549 		goto err;
550 
551 	istrec = rsb__is_terminal_recursive_matrix(mtxAp);
552 
553 	count += sizeof(*mtxAp);
554 
555 #if RSB_WANT_BITMAP
556 	if(mtxAp->options)
557 		count += rsb__get_sizeof_options(mtxAp->options,mtxAp->M_b,mtxAp->K_b);
558 	else
559 		return count;
560 #endif /* RSB_WANT_BITMAP */
561 	/* we allocate a new options structure */
562 	if( mtxAp->rpntr	) count += sizeof(rsb_coo_idx_t)*(mtxAp->M_b+1);
563 	if( mtxAp->cpntr	) count += sizeof(rsb_coo_idx_t)*(mtxAp->K_b+1);
564 	if(istrec)
565 	{
566 #if RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT
567 	if(rsb__is_coo_matrix(mtxAp))
568 	{
569 		if( mtxAp->flags & RSB_FLAG_USE_HALFWORD_INDICES)
570 			count += sizeof(rsb_half_idx_t)*(mtxAp->nnz)*2;
571 		else
572 			count += sizeof(rsb_coo_idx_t)*(mtxAp->nnz)*2;
573 	}
574 	else
575 	{
576 		if( mtxAp->flags & RSB_FLAG_USE_HALFWORD_INDICES)
577 			count += sizeof(rsb_half_idx_t)*(mtxAp->nnz)+sizeof(rsb_nnz_idx_t)*(mtxAp->nr+1);
578 		else
579 			count += sizeof(rsb_coo_idx_t)*(mtxAp->nnz)+sizeof(rsb_nnz_idx_t)*(mtxAp->nr+1);
580 	}
581 	if( mtxAp->VA  	) count += mtxAp->el_size*mtxAp->nnz;
582 	/* FIXME: missing the amount of memory allocated as extra submatrices for root, and the redundant structs array */
583 #else /* RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT */
584 	if( mtxAp->bindx	) count += sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1);
585 	if( mtxAp->indptr	) count += sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1);
586 	if( mtxAp->bpntr	) count += sizeof(rsb_nnz_idx_t)*(mtxAp->Mdim+1);
587 #if RSB_WANT_BITMAP
588 	if( mtxAp->VA  	) count += ( RSB_TOTAL_BLOCK_BYTES(mtxAp,mtxAp->options) );
589 #endif /* RSB_WANT_BITMAP */
590 #endif /* RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT */
591 	}
592 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
593 		if(submatrix)
594 			count += rsb__get_sizeof(submatrix);
595 
596 err:
597 	return count;
598 }
599 
rsb__nnz_coord_compar(const void * key,const void * am)600 int rsb__nnz_coord_compar(const void *key, const void *am)
601 {
602 	/*!
603 	 * \ingroup gr_internals
604 	 * A service function. NOTE:
605 	 *
606 	 * Please note that the sole use of this function is the major bottleneck during matrix creation.
607 	 * When thinking about optimizing matrix creation, come back here: this routine eats up to 90%
608 	 * of the time required for matrix creation.
609 	 * */
610 	register const rsb_coo_idx_t *iam = am,*ik = key;
611 	/*!
612 	 * this function is used as compar in for stdlib's bsearch
613 	 * on the ordered arrays p_r and p_c, and will return :
614 	 * -1, 0, or 1
615 	 *  respectively if the key element is :
616 	 * less than both, in between, or greater than both *am and am[1].
617 	 * */
618 	return (*ik < iam[1] )? (*ik<*iam?-1:0):1;
619 }
620 
rsb__allocate_bitmap(rsb_blk_idx_t rows,rsb_blk_idx_t cols)621 rsb_bitmap_data_t * rsb__allocate_bitmap(rsb_blk_idx_t rows, rsb_blk_idx_t cols)
622 {
623 	/*!
624 	 * \ingroup gr_internals
625 	 * \param rows the amount of rows the bitmap should have
626 	 * \param rows the amount of columns the bitmap should have
627 	 * \return the bitmap area
628 	 *
629 	 * Allocates an area of (((cols+sizeof(rsb_bitmap_data_t)-1))/sizeof(rsb_bitmap_data_t) * rows) bytes
630 	 * to use as a bitmap, through the  RSB_BITMAP_SET
631 	 * and  RSB_BITMAP_GET macros.
632 	 *
633 	 * This bitmap takes ( ceil(cols/sizeof(rsb_bitmap_data_t))*sizeof(rsb_bitmap_data_t)*rows ) bytes of memory.
634 	 * it should be roughly 1 bit for block of data
635 	 * or at worst
636 	 * ((cols/(sizeof(rsb_bitmap_data_t)*8))+1/(sizeof(rsb_bitmap_data_t)*8))/cols bits for every data block.
637 	 *
638 	 * The bitmap is return set to zero.
639 	 * assumes sizeof(rsb_bitmap_data_t)>1
640 	 * */
641 	if( RSB_INVALID_COO_INDEX(rows) || RSB_INVALID_COO_INDEX(cols))
642 		return NULL;
643 	return rsb__calloc(RSB_BYTES_PER_BITMAP(rows,cols));
644 }
645 
rsb__allocate_bitvector(rsb_blk_idx_t nbits)646 rsb_bitmap_data_t * rsb__allocate_bitvector(rsb_blk_idx_t nbits)
647 {
648 	/*
649 	 * \ingroup gr_internals
650 	 * Allocates an array for \c nbits  bits, set to zero.
651 	 * */
652 #ifdef RSB_BITMAP_ROW_MAJOR_ORDER
653 	return rsb__allocate_bitmap(nbits,1);
654 #else /* RSB_BITMAP_ROW_MAJOR_ORDER */
655 	return rsb__allocate_bitmap(1,nbits);
656 #endif /* RSB_BITMAP_ROW_MAJOR_ORDER */
657 }
658 
rsb__bitmap_bit_count(const rsb_bitmap_data_t * bitmap,const rsb_blk_idx_t rows,const rsb_blk_idx_t cols)659 rsb_blk_idx_t rsb__bitmap_bit_count(const rsb_bitmap_data_t *bitmap, const rsb_blk_idx_t rows, const rsb_blk_idx_t cols)
660 {
661 	/*!
662 	 * \ingroup gr_internals
663 	 * \param bitmap is the bitmap data pointer
664 	 * \param rows is the number of rows the bitmap was allocated for
665 	 * \param cols is the number of columns the bitmap was allocated for
666 	 * \return -1 in case of error, the set bit count otherwise
667 	 *
668 	 * This function counts the bits in a bitmap
669 	 * Note that it is not dependent on the internal bitmap storage scheme,
670 	 * (column major or row major), but only as long as the bit pool is uniform.
671 	 *
672 	 * TODO : this is not core functionality, so it could be moved somewhere else.
673 	 * */
674 	register rsb_blk_idx_t i,bc = 0;
675 	register rsb_bitmap_data_t w;
676 	if(!bitmap)
677 		return RSB_ERR_BADARGS;
678 	for(i=0;i<((rsb_blk_idx_t)RSB_BYTES_PER_BITMAP(rows,cols)/sizeof(rsb_bitmap_data_t));++i)
679 	{
680 		w = bitmap[i];
681 		/* TODO : no stdlib functions for counting bits in integers ? */
682 		//b += (w&1); while(w) b += ((w /= 2)&1);
683 		while(w!=0) {bc += (w&1);w /= 2;}
684 	}
685 	/* warning : overflow check missing */
686 	return bc;
687 }
688 
rsb__get_block_address(rsb_blk_idx_t blockrow,rsb_blk_idx_t blockcolumn,const struct rsb_mtx_t * mtxAp)689 void* rsb__get_block_address( rsb_blk_idx_t blockrow, rsb_blk_idx_t blockcolumn, const struct rsb_mtx_t *mtxAp)
690 {
691 	/*!
692 	 * \ingroup gr_internals
693 	 * \param blockrow the block row
694 	 * \param blockcolumn the block column
695 	 * \param mtxAp a valid matrix structure pointer
696 	 * \return a pointer to the block itself or NULL if it is not present
697  	 *
698 	 * A service function for getting the (blockrow,blockcolumn) block address inside the matrix.
699 	 *
700 	 * This function is SLOW, and should be used for debugging purposes only !
701 	 * ( it uses indirect indexing to catch elements )
702 	 * */
703 	rsb_nnz_idx_t l = 0;
704 	rsb_nnz_idx_t fnze = 0;
705 #if RSB_WANT_BITMAP
706 	struct rsb_options_t *o = NULL;
707 #endif /* RSB_WANT_BITMAP */
708 	rsb_err_t errval = RSB_ERR_NO_ERROR;
709 	rsb_nnz_idx_t offset = 0;
710 
711 	if(!mtxAp)
712 	{errval = RSB_ERR_BADARGS;goto err;}
713 
714 #if RSB_WANT_BITMAP
715 	o = mtxAp->options;
716 	if(!o)
717 	{errval = RSB_ERR_BADARGS;goto err;}
718 #endif /* RSB_WANT_BITMAP */
719 
720 	if(RSB_BLK_ADD_OVERFLOW(blockrow,RSB_INDEX_OF_SAFE_EXTRA))
721 	{errval = RSB_ERR_LIMITS;goto err;}
722 
723 	if(RSB_BLK_ADD_OVERFLOW(blockcolumn,RSB_INDEX_OF_SAFE_EXTRA))
724 	{errval = RSB_ERR_LIMITS;goto err;}
725 
726 	/* i is the working block row */
727 	if( mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
728 	{
729 		if(mtxAp->bpntr[blockcolumn]==mtxAp->bpntr[blockcolumn+1])
730 			goto err;/* empty block column */
731 		fnze = mtxAp->bpntr[blockcolumn];	/* first nonzero entry in bindx */
732 		while(mtxAp->bindx[fnze+l]!=blockrow)++l;
733 	}
734 	else
735 	{
736 		if(mtxAp->bpntr[blockrow]==mtxAp->bpntr[blockrow+1])
737 			goto err;/* empty block row */
738 		fnze = mtxAp->bpntr[blockrow];	/* first nonzero entry in bindx */
739 		while(mtxAp->bindx[fnze+l]!=blockcolumn)++l;
740 	}
741 
742 	if(RSB_NNZ_ADD_OVERFLOW(fnze,l))
743 	{errval = RSB_ERR_LIMITS;goto err;}
744 
745 	offset = fnze+l;
746 	//return ((rsb_byte_t*)(mtxAp->VA)) + mtxAp->indptr[offset] * mtxAp->el_size;
747 	return RSB_BLOCK_ADDRESS(mtxAp,offset);
748 err:
749 	rsb__do_perror(NULL,errval);
750 	return NULL;
751 }
752 
rsb__recheck_insertion(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,const struct rsb_mtx_t * mtxAp,const struct rsb_options_t * o)753 rsb_err_t rsb__recheck_insertion(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, const struct rsb_mtx_t *mtxAp, const struct rsb_options_t *o)
754 {
755 	/*!
756 	 * \ingroup gr_internals
757 	 * This is a very slow debug function.
758 	 * It should be supplied with some sparse matrix construction arrays in any order, and
759 	 * a fully constructed matrix structure.
760 	 *
761 	 * \note: Does not support block column major matrices and sorted ones.
762 	 *
763 	 * TODO  : should support more matrix formats ( e.g.: block column majer )
764 	 * FIXME : obsolete, very limited function
765 	 *
766 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
767 	 * */
768 	register rsb_blk_idx_t i,j;
769 	rsb_nnz_idx_t k;
770 	rsb_nnz_idx_t missing = 0;
771 	const rsb_byte_t *moff = NULL;
772 	const rsb_byte_t *src = NULL;
773 
774 	if(!mtxAp || !o )return RSB_ERR_BADARGS;
775 	if(mtxAp->flags & RSB_FLAG_SORT_INPUT) return RSB_ERR_BADARGS;
776 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER) return RSB_ERR_UNIMPLEMENTED_YET;
777 	if(! o->bitmap)  return RSB_ERR_UNSUPPORTED_OPERATION;/* when building sorted matrices, we don't create bitmaps .. should we ? */
778 
779 #if RSB_WANT_BITMAP
780 	for(k=0;k<nnz;++k) { rsb_coo_idx_t iI = IA[k],iJ = JA[k];RSB_BLOCK_UNSET_BIT_FOR_NNZ(&iI,&iJ,k,mtxAp); }
781 	for(k=0;k<nnz;++k) { RSB_BLOCK_SET_BIT_FOR_NNZ(  IA,JA,k,mtxAp); }
782 #endif /* RSB_WANT_BITMAP */
783 
784 	for(k=0;k<nnz;++k)
785 	{
786 		i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,mtxAp);
787 		j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,mtxAp);
788 		if(!(RSB_BITMAP_GET(o->bitmap,mtxAp->M_b,mtxAp->K_b,i,j))) ++missing;
789 	}
790 
791 	if(!missing)
792 		RSB_STDERR("checking structure : there are no blocks missing.\n");
793 	else
794 		RSB_STDERR("checking structure : there are %zd blocks missing\n",(size_t)missing);
795 	if(missing) return RSB_ERR_GENERIC_ERROR;
796 	for(k=0;k<nnz;++k)
797 	{
798 		i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,mtxAp);
799 		j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,mtxAp);
800 		moff = rsb__get_block_address(i,j,mtxAp);
801 		if(!moff)
802 		{
803 			RSB_STDERR("critical block error on block (%d,%d).\n",i,j);
804 			return RSB_ERR_GENERIC_ERROR;
805 		}
806 		else
807 		{
808 			moff += RSB_GET_INTRA_BLOCK_OFFSET(IA[k],JA[k],i,j,mtxAp);
809 			src = VA;
810 			src += mtxAp->el_size*k;
811 
812 			if(RSB_MEMCMP(src,moff,mtxAp->el_size))
813 			{
814 				/* may give problems when flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER */
815 				RSB_ERROR("critical error: %d'th nonzero (%d,%d) at (%d,%d) in block (%d,%d) is wrong!\n",
816 				k,
817 				IA[k]+1,JA[k]+1,
818 				RSB_INTRA_BLOCK_ROW(IA[k],i,mtxAp),RSB_INTRA_BLOCK_COLUMN(JA[k],j,mtxAp),i,j);
819 				/* warning : the following instruction is potentially harmful ! */
820 #ifdef RSB_DEBUG_BLOCK_STUFF
821 				RSB_STDERR("should be : 0x%x\n",*(int*)src );
822 				RSB_STDERR("is : 0x%x\n",*((int*)(moff)));
823 /*				RSB_STDERR("should be : %g\n",src );
824 				RSB_STDERR("is : %g\n",*((float*)(moff)));*/
825 #endif /* RSB_DEBUG_BLOCK_STUFF */
826 				return RSB_ERR_GENERIC_ERROR;
827 			}
828 		}
829 	}
830 	return RSB_ERR_NO_ERROR;
831 }
832 
rsb__do_is_valid_pinfo_t(const struct rsb_mtx_partitioning_info_t * pinfop)833 rsb_err_t rsb__do_is_valid_pinfo_t(const struct rsb_mtx_partitioning_info_t * pinfop)
834 {
835 	/*!
836 	 * \ingroup gr_internals
837 	 * \param pinfop should specify a partitioning info array
838 	 *
839 	 * This is a strictly debugging function, whose sole purpose is to verify
840 	 * the partitioning arrays contents of a rsb_mtx_partitioning_info_t structure.
841 	 * */
842 	rsb_nnz_idx_t k;
843 	if(pinfop->nr<1)
844 	{
845 		RSB_PERR_GOTO(err,"m == %d ?\n",pinfop->nr);
846 	}
847 	if(pinfop->nc<1)
848 	{
849 		RSB_PERR_GOTO(err,"k == %d ?\n",pinfop->nc);
850 	}
851 	if(pinfop->rpntr && pinfop->M_b<1)
852 	{
853 		RSB_PERR_GOTO(err,"M_b == %d ?\n",pinfop->M_b-1);
854 	}
855 	if(pinfop->cpntr && pinfop->K_b<1)
856 	{
857 		RSB_PERR_GOTO(err,"K_b == %d ?\n",pinfop->K_b-1);
858 	}
859 
860 	if(pinfop->rpntr && pinfop->cpntr )
861 	{
862 		/* FIXME */
863 	if(pinfop->rpntr[pinfop->M_b]<=pinfop->rpntr[pinfop->M_b-1])
864 	{
865 		RSB_PERR_GOTO(err,"last (%d) rpntr element is %d <= %d\n",pinfop->M_b,pinfop->rpntr[pinfop->M_b],pinfop->rpntr[pinfop->M_b-1]);
866 	}
867 	if(pinfop->cpntr[pinfop->K_b]<=pinfop->cpntr[pinfop->K_b-1])
868 	{
869 		RSB_PERR_GOTO(err,"last (%d) cpntr element is %d <= %d\n",pinfop->K_b,pinfop->cpntr[pinfop->K_b],pinfop->cpntr[pinfop->K_b-1]);
870 	}
871 
872 	for(k=0;k<pinfop->M_b;++k)if(pinfop->rpntr[k]<0)
873 	{
874 		RSB_PERR_GOTO(err,"bad partitioning : rpntr[%d]=%d\n",k,pinfop->rpntr[k]);
875 	}
876 	for(k=0;k<pinfop->K_b;++k)if(pinfop->cpntr[k]<0)
877 	{
878 		RSB_PERR_GOTO(err,"bad partitioning : cpntr[%d]=%d\n",k,pinfop->cpntr[k]);
879 	}
880 	for(k=0;k<pinfop->M_b;++k)if(pinfop->rpntr[k]>pinfop->nr)
881 	{
882 		RSB_PERR_GOTO(err,"bad partitioning : rpntr[%d]=%d > m==%d\n",k,pinfop->rpntr[k],pinfop->nr);
883 	}
884 	for(k=0;k<pinfop->K_b;++k)if(pinfop->cpntr[k]>pinfop->nc)
885 	{
886 		RSB_PERR_GOTO(err,"bad partitioning : cpntr[%d]=%d > k==%d\n",k,pinfop->cpntr[k],pinfop->nc);
887 	}
888 	}
889 	return RSB_ERR_NO_ERROR;
890 err:
891 	return RSB_ERR_GENERIC_ERROR;
892 }
893 
rsb__compute_partial_fillin_for_nnz_fraction(const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,struct rsb_mtx_partitioning_info_t * pinfop,size_t * element_countp,size_t * block_countp)894 rsb_err_t rsb__compute_partial_fillin_for_nnz_fraction(const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA,const  rsb_nnz_idx_t nnz, struct rsb_mtx_partitioning_info_t * pinfop, size_t * element_countp, size_t * block_countp)
895 {
896 	/*!
897 	 * \ingroup gr_internals
898 	 * see rsb__compute_partial_fillin_for_nnz_fractions
899 	 * */
900 	return rsb__compute_partial_fillin_for_nnz_fractions(IA,JA,&nnz,1,pinfop,element_countp,block_countp);
901 }
902 
rsb__compute_partial_fillin_for_nnz_fractions(const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t * nnz,const rsb_nnz_idx_t nnzn,struct rsb_mtx_partitioning_info_t * pinfop,size_t * element_countp,size_t * block_countp)903 rsb_err_t rsb__compute_partial_fillin_for_nnz_fractions(const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA,const  rsb_nnz_idx_t * nnz, const rsb_nnz_idx_t nnzn, struct rsb_mtx_partitioning_info_t * pinfop, size_t * element_countp, size_t * block_countp)
904 {
905 	/*!
906 	 * \ingroup gr_internals
907 	 * \param IA is a row indices array sized nnz
908 	 * \param JA is a column indices array sized nnz
909 	 * \param nnz is the length of IA and JA
910 	 * \param element_countp is where the element counts will be written
911 	 * \param block_countp   is where the block   counts will be written
912 	 *
913 	 * Will estimate fillin for the first nnz[ni]<nnz[ni+1]<...<nnz[nnzn-1] elements, with nnz[nnzn-1] being
914 	 * less than or equal to the number of elements in the IA, JA, element_countp, block_countp arrays.
915 	 *
916 	 * Note: this function performs almost no data validation.
917 	 * Note : this is not a service but an experimental function, and is very slow.
918 	 * Note : another wayof structuring thisfunction would beto make it accept a 2*nnzn sized array with lower
919 	 *        and upper segment indices both specified.
920 	 *        This would have been more flexible but would require some change in this function code.
921 	 * TODO : this is not core functionality, so this function could be moved elsewhere
922 	 * */
923 
924 	rsb_bitmap_data_t * bitmap = NULL;
925 	size_t  element_count = 0;
926 	rsb_nnz_idx_t block_count = 0;
927 	rsb_nnz_idx_t k = 0,l = 0;/* were -1 */
928 	rsb_blk_idx_t i = 0,j = 0;/* were -1 */
929 
930 	if( !IA || !JA || !pinfop || !element_countp || !block_countp ) goto err;
931 	if( nnzn < 1 ) goto err;
932 	if( RSB_INVALID_BLK_INDEX(pinfop->M_b) || RSB_INVALID_BLK_INDEX(pinfop->K_b) )goto err;
933 	if( !pinfop->rpntr  || !pinfop->cpntr  )goto err;
934 	bitmap = rsb__allocate_bitmap(pinfop->M_b,pinfop->K_b);
935 	if(!bitmap)goto err;
936 
937 	#ifdef  RSB_DEBUG_INPUT
938 	if(rsb__do_is_valid_pinfo_t(pinfop))
939 	{
940 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
941 	}
942 	#endif /* RSB_DEBUG_INPUT */
943 
944 	if(1)
945 	{
946 	size_t skip = 0,skipu = 0;//new
947 	rsb_nnz_idx_t c = 0;
948 	rsb_nnz_idx_t sl;
949 
950 	skip = 0;
951 
952 	skipu = (nnz[nnzn-1]/(nnzn*nnzn));
953 	skip = nnzn*skipu;
954 	for(sl=0;sl<nnzn;++sl)
955 		block_countp[sl] = element_countp[sl] = 0;
956 
957 	#if 1
958 	/* An alternative way, much more stable!
959 	 * However, it underestimates the nnz count so it should in some manner mitigated! */
960 	for(l=0;l<nnzn;++l)
961 	{
962 		for(sl=0;sl<nnzn;++sl)
963 		{
964 		//RSB_INFO("#i: %d / %d  (%d)\n",sl*skip+l*skipu,sl*skip+(l+1)*skipu,nnzn);
965 		for(k=sl*skip+l*skipu;k<sl*skip+(l+1)*skipu;++k)
966 		{
967 
968 			++c;
969 			/* NOTE : the following ifelse statements are for situations where m<br or k<bc  */
970 			if(pinfop->M_b>1)
971 				i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,pinfop);
972 			else
973 			       	i = 0;
974 			if(pinfop->K_b>1)
975 				j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,pinfop);
976 			else
977 			       	j = 0;
978 
979 			/* if the bit is not already set */
980 			if(!(RSB_BITMAP_GET(bitmap,pinfop->M_b,pinfop->K_b,i,j)))
981 			{
982 				element_count += GET_BLOCK_SIZE(i,j,pinfop);
983 				(block_count)++;
984 				RSB_BITMAP_SET(bitmap,pinfop->M_b,pinfop->K_b,i,j) ;
985 			}
986 		}
987 		block_countp[l] = block_count;
988 		element_countp[l] = element_count;
989 		}
990 	}
991 	l = nnzn-1;
992 	//RSB_INFO("#c: %d / %d (%d)..  %d -> %d\n",c,block_countp[l],nnzn,nnzn*skip,nnz[l]);
993 	for(k=c;k<nnz[l];++k)
994 	//for(k=nnzn*skip;k<nnz[l];++k)
995 	{
996 //		++c;
997 
998 			i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,pinfop);
999 			j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,pinfop);
1000 
1001 			/* if the bit is not already set */
1002 			if(!(RSB_BITMAP_GET(bitmap,pinfop->M_b,pinfop->K_b,i,j)))
1003 			{
1004 				element_countp[l] += GET_BLOCK_SIZE(i,j,pinfop);
1005 				block_countp[l]++;
1006 				RSB_BITMAP_SET(bitmap,pinfop->M_b,pinfop->K_b,i,j) ;
1007 			}
1008 	}
1009 	//RSB_INFO("#c: %d / %d (%d)\n",c,block_countp[l],nnzn);
1010 	#endif
1011 	}
1012 	else
1013 	for(l=0;l<nnzn;++l)
1014 	{
1015 		rsb_nnz_idx_t li;
1016 		if(l==0)
1017 			li = 0;
1018 	       	else
1019 		       	li = nnz[l-1];/* will the first loop optimized by the compiler ? :) */
1020 
1021 		for(k=li;k<nnz[l];++k)
1022 		{
1023 
1024 			i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,pinfop);
1025 			j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,pinfop);
1026 
1027 			/* if the bit is not already set */
1028 			if(!(RSB_BITMAP_GET(bitmap,pinfop->M_b,pinfop->K_b,i,j)))
1029 			{
1030 				element_count += GET_BLOCK_SIZE(i,j,pinfop);
1031 				(block_count)++;
1032 				RSB_BITMAP_SET(bitmap,pinfop->M_b,pinfop->K_b,i,j) ;
1033 			}
1034 		}
1035 		if(block_countp)
1036 			block_countp[l] = block_count;
1037 		if(element_countp)
1038 			element_countp[l] = element_count;
1039 	}
1040 
1041 	RSB_CONDITIONAL_FREE(bitmap);
1042 	return RSB_ERR_NO_ERROR;
1043 err:
1044 	return RSB_ERR_ENOMEM;
1045 }
1046 
1047 #if RSB_WANT_BITMAP
rsb_element_block_count_and_bitmap_from_coo_partitioning(const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,size_t * element_countp,rsb_nnz_idx_t * block_countp,rsb_bitmap_data_t ** bitmapp,const rsb_flags_t flags,struct rsb_mtx_t * mtxAp)1048 static rsb_err_t rsb_element_block_count_and_bitmap_from_coo_partitioning(const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, size_t * element_countp, rsb_nnz_idx_t * block_countp, rsb_bitmap_data_t ** bitmapp, const rsb_flags_t flags, struct rsb_mtx_t * mtxAp)
1049 {
1050 	/*!
1051 	 * \ingroup gr_internals
1052 	 * \param IA is a row indices array sized nnz
1053 	 * \param JA is a column indices array sized nnz
1054 	 * \param nnz is the length of IA and JA
1055 	 * \param pinfop should specify a partitioning info array
1056 	 * \param element_countp is where the element counts will be written
1057 	 * \param block_countp   is where the block   counts will be written
1058 	 *
1059 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
1060 	 *
1061 	 * WARNING : IA and JA can respectively point to columns and rows arrays instead of rows and columns,
1062 	 * as long as the pinfop information is swapped accordingly.
1063 	 * In this way a transposed bitmap will be allocated (note that its size could be the same. guess why..).
1064 	 *
1065 	 * In case of error, no bitmap will be allocated, but its pointer may be overwritten.
1066 	 * In case of success, a bitmap structure will be allocated.
1067 	 * */
1068 
1069 	rsb_nnz_idx_t k = 0;
1070 	rsb_bitmap_data_t * bitmap = NULL;
1071 	size_t element_count = 0;
1072 	rsb_nnz_idx_t block_count = 0;
1073 	rsb_blk_idx_t mI = 0,MI = 0;
1074 	const rsb_coo_idx_t * mIndx = NULL,* MIndx = NULL;
1075 
1076 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1077 	{
1078 		mIndx = IA;
1079 		MIndx = JA;
1080 	}
1081 	else
1082 	{
1083 		mIndx = JA;
1084 		MIndx = IA;
1085 	}
1086 
1087 	if( !IA || !JA || !pinfop || !element_countp || !bitmapp || !block_countp ) goto err;
1088 	if( RSB_INVALID_NNZ_INDEX(nnz) ) goto err;
1089 	if( RSB_INVALID_BLK_INDEX(pinfop->M_b) || RSB_INVALID_BLK_INDEX(pinfop->K_b) )goto err;
1090 	if( !pinfop->rpntr  || !pinfop->cpntr  )goto err;
1091 
1092 	bitmap = rsb__allocate_bitmap(mtxAp->Mdim,mtxAp->mdim);
1093 
1094 	if(!bitmap)goto err;
1095 
1096 	if( mtxAp->flags & RSB_FLAG_SHOULD_DEBUG )
1097 		if(rsb__do_is_valid_pinfo_t(pinfop))
1098 		{
1099 			RSB_PERR_GOTO(err,RSB_ERRM_ES);
1100 		}
1101 
1102 	if(RSB_WANT_VERBOSE_MESSAGES)
1103 		RSB_INFO("counting matrix blocks ..\n");
1104 
1105 	for(k=0;RSB_LIKELY(k<nnz);++k)
1106 	{
1107 		/*
1108 		 * We count the amount of elements for each block, setting bits in
1109 		 * our bitmap where a block should be placed, and leaving unset bits
1110 		 * which correspond to zero blocks
1111 		 * */
1112 
1113 		MI = RSB_GET_BLOCK_MAJ_FOR_NZ(MIndx+k,mtxAp);
1114 		mI = RSB_GET_BLOCK_MIN_FOR_NZ(mIndx+k,mtxAp);
1115 
1116 		RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(MI));
1117 		RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(mI));
1118 
1119 		if(mI>=mtxAp->mdim)
1120 		{
1121 			RSB_PERR_GOTO(err," j=%d >= o->K_b=%d\n ",mI,mtxAp->mdim);
1122 		}
1123 		if(mI <0 )
1124 		{
1125 			RSB_PERR_GOTO(err," j=%d < 0 \n",mI);
1126 		}
1127 		if(MI>=mtxAp->Mdim)
1128 		{
1129 			RSB_PERR_GOTO(err," i=%d >= o->M_b=%d\n ",MI,mtxAp->Mdim);
1130 		}
1131 		if(MI <0 )
1132 		{
1133 			RSB_PERR_GOTO(err," i=%d < 0 \n",MI);
1134 		}
1135 
1136 		/* if the bit is not already set */
1137 		if(!(RSB_BITMAP_GET(bitmap,mtxAp->Mdim,mtxAp->mdim,MI,mI)))
1138 		{
1139 			if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1140 				element_count += GET_BLOCK_SIZE(mI,MI,pinfop);
1141 			else
1142 				element_count += GET_BLOCK_SIZE(MI,mI,pinfop);
1143 
1144 			(block_count)++;
1145 			RSB_BITMAP_SET(bitmap,mtxAp->Mdim,mtxAp->mdim,MI,mI) ;
1146 		}
1147 	}
1148 
1149 	if(block_count > nnz)
1150 	{
1151 		RSB_PERR_GOTO(err,"(mtxAp->block_count=%d >= n=%d)!\n",block_count,nnz);
1152 	}
1153 
1154 	*block_countp = block_count;
1155 	*element_countp = element_count;
1156 	*bitmapp = bitmap;
1157 
1158 	return RSB_ERR_NO_ERROR;
1159 err:
1160 	RSB_CONDITIONAL_FREE(bitmap);
1161 	return RSB_ERR_GENERIC_ERROR;
1162 }
1163 #endif /* RSB_WANT_BITMAP */
1164 
rsb__do_set_init_storage_flags(struct rsb_mtx_t * mtxAp,rsb_flags_t flags)1165 rsb_err_t rsb__do_set_init_storage_flags(struct rsb_mtx_t *mtxAp, rsb_flags_t flags)
1166 {
1167 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1168 	rsb_flags_t storage_only_flags = RSB_DO_FLAGS_EXTRACT_STORAGE(flags);
1169 
1170 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1171 	if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_LINKED_STORAGE))
1172 	{
1173 		if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1174 #ifdef RSB_MATRIX_STORAGE_LC
1175 			mtxAp->matrix_storage = RSB_MATRIX_STORAGE_LC;
1176 #els /* RSB_MATRIX_STORAGE_LC */e
1177 			{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1178 #endif /* RSB_MATRIX_STORAGE_LC */
1179 		else
1180 #ifdef RSB_MATRIX_STORAGE_LR
1181 			mtxAp->matrix_storage = RSB_MATRIX_STORAGE_LR;
1182 #else /* RSB_MATRIX_STORAGE_LR */
1183 			{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1184 #endif  /* RSB_MATRIX_STORAGE_LR */
1185 	}
1186 	else
1187 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1188 	{
1189 		if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_COO_STORAGE))
1190 		{
1191 			if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1192 #ifdef RSB_MATRIX_STORAGE_BCOC
1193 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOC;
1194 #else /* RSB_MATRIX_STORAGE_BCOC */
1195 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1196 #endif /* RSB_MATRIX_STORAGE_BCOC */
1197 			else
1198 #ifdef RSB_MATRIX_STORAGE_BCOR
1199 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOR;
1200 #else /* RSB_MATRIX_STORAGE_BCOR */
1201 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1202 #endif /* RSB_MATRIX_STORAGE_BCOR */
1203 		}
1204 		else //FIXME: this switch could cohexist with CSS, and be processed later or be ignored (in the old constructor)
1205 		if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_COO_STORAGE))
1206 		{
1207 			if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1208 #ifdef RSB_MATRIX_STORAGE_BCOC
1209 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOC;
1210 #else /* RSB_MATRIX_STORAGE_BCOC */
1211 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1212 #endif /* RSB_MATRIX_STORAGE_BCOC */
1213 			else
1214 #ifdef RSB_MATRIX_STORAGE_BCOR
1215 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOR;
1216 #else /* RSB_MATRIX_STORAGE_BCOR */
1217 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1218 #endif /* RSB_MATRIX_STORAGE_BCOR */
1219 		}
1220 		else
1221 		if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_BCSS_STORAGE))
1222 		{
1223 			if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1224 #ifdef RSB_MATRIX_STORAGE_BCSC
1225 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCSC;
1226 #else /* RSB_MATRIX_STORAGE_BCSC */
1227 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1228 #endif /* RSB_MATRIX_STORAGE_BCSC */
1229 			else
1230 #ifdef RSB_MATRIX_STORAGE_BCSR
1231 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCSR;
1232 #else /* RSB_MATRIX_STORAGE_BCSR */
1233 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1234 #endif /* RSB_MATRIX_STORAGE_BCSR */
1235 		}
1236 		else
1237 		if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_FIXED_BLOCKING_VBR))
1238 		{
1239 			if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1240 #ifdef RSB_MATRIX_STORAGE_VBC
1241 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_VBC;
1242 #else /* RSB_MATRIX_STORAGE_VBC */
1243 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1244 #endif /* RSB_MATRIX_STORAGE_VBC */
1245 			else
1246 #ifdef RSB_MATRIX_STORAGE_VBR
1247 				mtxAp->matrix_storage = RSB_MATRIX_STORAGE_VBR;
1248 #else /* RSB_MATRIX_STORAGE_VBR */
1249 				{errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1250 #endif /* RSB_MATRIX_STORAGE_VBR */
1251 		}
1252 		else
1253 		{
1254 			/* undetermined format or a merge of formats (happens on recursive matrices during construction) */
1255 			mtxAp->matrix_storage = storage_only_flags;
1256 		}
1257 	}
1258 err:
1259 	RSB_DO_ERR_RETURN(errval)
1260 }
1261 
rsb__set_init_flags_and_stuff(struct rsb_mtx_t * mtxAp,struct rsb_options_t * o,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_nnz_idx_t nnz,rsb_nnz_idx_t block_count,rsb_nnz_idx_t element_count,rsb_type_t typecode,rsb_flags_t flags)1262 rsb_err_t rsb__set_init_flags_and_stuff( struct rsb_mtx_t *mtxAp, struct rsb_options_t * o, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_nnz_idx_t nnz, rsb_nnz_idx_t block_count, rsb_nnz_idx_t element_count, rsb_type_t typecode, rsb_flags_t flags )
1263 {
1264 	/**
1265 	 * \ingroup gr_internals
1266 	 *
1267 	 * This inner service function sets some flags and variables during matrix construction.
1268 	 *
1269 	 * */
1270 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1271 
1272 	if(!mtxAp /*|| !o */ /* FIXME: o disabled lately */
1273 #if !RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
1274 	 || !pinfop
1275 #endif /* RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS  */
1276 	)
1277 	{
1278 		errval = RSB_ERR_BADARGS;
1279 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
1280 	}
1281 
1282 	mtxAp->flags = flags;
1283 	mtxAp->typecode =typecode;
1284 	mtxAp->el_size = RSB_NUMERICAL_TYPE_SIZE(typecode);
1285 #if RSB_WANT_BITMAP
1286 	mtxAp->options = NULL;	// we ignore o
1287 //	mtxAp->options = o;
1288 #endif /* RSB_WANT_BITMAP */
1289 	mtxAp->nnz = nnz;
1290 	mtxAp->element_count = element_count;
1291 	mtxAp->block_count = block_count;
1292 
1293 	if(pinfop)
1294 	{
1295 		mtxAp->M_b = pinfop->M_b;
1296 		mtxAp->K_b = pinfop->K_b;
1297 		mtxAp->nr = pinfop->nr;
1298 		mtxAp->nc = pinfop->nc;
1299 		mtxAp->rpntr = pinfop->rpntr;
1300 		mtxAp->cpntr = pinfop->cpntr;
1301 //#if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
1302 		mtxAp->br = pinfop->br;
1303 		mtxAp->bc = pinfop->bc;
1304 //#endif /* RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR */
1305 	}
1306 	else
1307 	{
1308 		mtxAp->M_b = m;
1309 		mtxAp->K_b = k;
1310 		mtxAp->nr = m;
1311 		mtxAp->nc = k;
1312 		mtxAp->rpntr = NULL;
1313 		mtxAp->cpntr = NULL;
1314 //#if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
1315 		mtxAp->br = 1;
1316 		mtxAp->bc = 1;
1317 //#endif /* RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR */
1318 		RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(mtxAp->br+1));
1319 		RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(mtxAp->bc+1));
1320 	}
1321 	if(RSB_IS_INVALID_TYPE_SIZE(mtxAp->el_size = rsb__do_sizeof(mtxAp->typecode)))
1322 	{
1323 		errval = RSB_ERR_INTERNAL_ERROR;
1324 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
1325 	}
1326 
1327 	if((errval = rsb__do_set_init_storage_flags(mtxAp,flags))!=RSB_ERR_NO_ERROR)
1328 	{
1329 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
1330 	}
1331 
1332 	if(mtxAp->br==1 && mtxAp->bc==1)
1333 	{
1334 		mtxAp->M_b = mtxAp->nr;
1335 		mtxAp->K_b = mtxAp->nc;
1336 	}
1337 
1338 	/* setting aliases */
1339 	if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1340 	{
1341 		mtxAp->Mdim = mtxAp->K_b;
1342 		mtxAp->mdim = mtxAp->M_b;
1343 		mtxAp->mpntr = mtxAp->rpntr;
1344 		mtxAp->Mpntr = mtxAp->cpntr;
1345 	}
1346 	else
1347 	{
1348 		mtxAp->Mdim = mtxAp->M_b;
1349 		mtxAp->mdim = mtxAp->K_b;
1350 		mtxAp->Mpntr = mtxAp->rpntr;
1351 		mtxAp->mpntr = mtxAp->cpntr;
1352 	}
1353 
1354 //	RSB_DEBUG_ASSERT(mtxAp->Mdim);
1355 //	RSB_DEBUG_ASSERT(mtxAp->mdim);
1356 //	RSB_DEBUG_ASSERT(mtxAp->rpntr);
1357 //	RSB_DEBUG_ASSERT(mtxAp->cpntr);
1358 	RSB_DEBUG_ASSERT(mtxAp->el_size);
1359 
1360 	return RSB_ERR_NO_ERROR;
1361 err:
1362 	RSB_DO_ERR_RETURN(errval)
1363 }
1364 
1365 #if 0
1366 rsb_err_t rsb_dump_matrix ( const struct rsb_mtx_t *mtxAp )
1367 {
1368 	/*!
1369 	 * \ingroup gr_internals
1370 	 *
1371 	 *  \param mtxAp is a valid matrix structure pointer
1372 	 *  \param diagonal is an array sized as min(mtxAp->nr,mtxAp->nc) which on exit will contain the diagonal elements.
1373 	 *  \return -1 in case of error, 0 otherwise
1374 	 *
1375 	 * FIXME : UNTESTED AND UNDOCUMENTED AND UNFINISHED
1376 	 * FIXME : USE rsb_print_matrix and delete this ?
1377 	 * */
1378 	register rsb_nnz_idx_t baserow,basecolumn;
1379 	register rsb_blk_idx_t rows,columns;
1380 	register rsb_blk_idx_t blockrow,blockcolumn;
1381 	register rsb_byte_t *bp;
1382 
1383 	RSB_INFO("%% [!] TESTING CODE !\n");
1384 	RSB_INFO("%%rows:%d columns:%d blocks:%d\n",mtxAp->nr,mtxAp->nc,mtxAp->block_count);
1385 	RSB_INFO("%d %d %d\n", mtxAp->nr,mtxAp->nc,mtxAp->nnz);
1386 	RSB_GET_FIRST_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1387 	while(!RSB_GOT_LAST_BLOCK_POINTER(mtxAp))
1388 	{
1389 		rsb_coo_idx_t r,c;
1390 		/*
1391 		 * FIXME
1392 		 * */
1393 //		RSB_INFO("%x \n", bp) ;
1394 //		RSB_INFO("_k : %d %d   ", _k,_lastk) ;
1395 //		RSB_INFO("%d %d ", baserow,basecolumn) ;
1396 		RSB_INFO("%d %d\n", rows,columns) ;
1397 #if 1
1398 		for(r=0;r<rows;++r)
1399 		for(c=0;c<columns;++c)
1400 		{
1401 				RSB_INFO("%d %d %lg\n", baserow+r,basecolumn+c,((double*)bp)[columns*r+c]) ;
1402 		}
1403 #endif
1404 		RSB_GET_NEXT_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1405 	}
1406 
1407 	return RSB_ERR_NO_ERROR;
1408 }
1409 #endif
1410 
rsb__do_insert_sorted(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop)1411 rsb_err_t rsb__do_insert_sorted( struct rsb_mtx_t * mtxAp, const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop)
1412 {
1413 	/*!
1414 	 * \ingroup gr_internals
1415 	 * Inserts in the matrix structures the specified coo elements, sorted accordingly to the specified BCSR blocking.
1416 	 */
1417 
1418 	rsb_coo_idx_t blockrows = 0;
1419 	rsb_coo_idx_t blockcolumns = 0;
1420 	rsb_coo_idx_t baserow = 0;
1421 	rsb_coo_idx_t basecolumn = 0;
1422 	rsb_nnz_idx_t *indptr = mtxAp->indptr;
1423 	const rsb_coo_idx_t *Mpntr = NULL;
1424 	const rsb_coo_idx_t *mpntr = NULL;
1425 	const rsb_coo_idx_t *MIndx = NULL;
1426 	const rsb_coo_idx_t *mIndx = NULL;
1427 	rsb_blk_idx_t mI = 0,MI = 0;
1428 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1429 	rsb_nnz_idx_t k = 0;	/* will index a nnz sized array */
1430 	rsb_nnz_idx_t K = 0;
1431 	rsb_byte_t*dst = NULL;
1432 	rsb_byte_t*src = NULL;
1433 
1434 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1435 	{
1436 		mpntr = pinfop->rpntr;
1437 		Mpntr = pinfop->cpntr;
1438 		mIndx = IA;
1439 		MIndx = JA;
1440 	}
1441 	else
1442 	{
1443 		Mpntr = pinfop->rpntr;
1444 		mpntr = pinfop->cpntr;
1445 		MIndx = IA;
1446 		mIndx = JA;
1447 	}
1448 
1449 	k = mI = MI = 0;K = 0;
1450 	blockrows = Mpntr[MI+1] - Mpntr[MI];
1451 	blockcolumns = mpntr[mI+1] - mpntr[mI];
1452 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping 'preceding' block rows .. */
1453 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping 'preceding' block columns .. */
1454 	baserow = Mpntr[MI];
1455 	basecolumn = mpntr[mI];
1456 	mtxAp->bindx [ K ] = mI;			/* a 'new' block */
1457 	indptr[ K+1 ]=indptr[ K  ] + blockrows * blockcolumns;
1458 
1459 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1460 	if( rsb__have_linked_storage(mtxAp->flags) )
1461 	{
1462 		if(RSB_WANT_VERBOSE_MESSAGES)
1463 			RSB_INFO("initializing linked lists stuff.\n");
1464 		if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1465 	RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),mI,MI,blockcolumns,blockrows,basecolumn,baserow)
1466 		else
1467 	RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),MI,mI,blockrows,blockcolumns,baserow,basecolumn)
1468 	}
1469 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1470 
1471 /*
1472 	dst = mtxAp->VA;
1473 	dst += RSB_BLOCK_OFFSET(mtxAp,K);
1474 	{rsb_blk_idx_t ibo = 0;
1475 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1476 		ibo = RSB_GET_INTRA_BLOCK_OFFSET(mIndx[k],MIndx[k],mI,MI,mtxAp) ;
1477 	else
1478 		ibo = RSB_GET_INTRA_BLOCK_OFFSET(MIndx[k],mIndx[k],MI,mI,mtxAp) ;
1479 	dst += ibo;}
1480 	src = ((rsb_byte_t*)VA) + mtxAp->el_size * k;
1481 	RSB_NUMERICAL_TYPE_SET_ELEMENT(dst,src,mtxAp->typecode);*/
1482 
1483 	while(RSB_LIKELY(k<nnz))
1484 	{
1485 #ifdef DEBUG
1486 		if( MIndx[k] < baserow  )
1487 		{
1488 			RSB_ERROR("k=%d : (%d %d) is not ok\n",k, MIndx[k]+1,mIndx[k]+1);
1489 			RSB_STDERR("(minor dim. index %d < base row %d)\n",MIndx[k] , baserow);
1490 			errval = RSB_ERR_INTERNAL_ERROR;
1491 			goto err;/* NOTE : this jump could be evil */
1492 		}
1493 #endif /* DEBUG */
1494 
1495 		if( mIndx[k] >= basecolumn+blockcolumns  )
1496 		{
1497 			/* new block column, for sure */
1498 			while( mIndx[k] >= mpntr[mI+1] )++mI;
1499 			blockcolumns = mpntr[mI+1] - mpntr[mI];
1500 			basecolumn = mpntr[mI];
1501 
1502 			if( MIndx[k] >= baserow+blockrows  )
1503 			{
1504 				/* new block row  */
1505 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
1506 				blockrows = Mpntr[MI+1] - Mpntr[MI];
1507 				baserow = Mpntr[MI];
1508 			}
1509 			else
1510 			{
1511 				/* same block row  */
1512 			}
1513 			++K;
1514 			mtxAp->bindx [ K ] = mI;			/* a 'new' block */
1515 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
1516 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1517 			if( rsb__have_linked_storage(mtxAp->flags) )
1518 			{
1519 				if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1520 	RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),mI,MI,blockcolumns,blockrows,basecolumn,baserow)
1521 				else
1522 	RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),MI,mI,blockrows,blockcolumns,baserow,basecolumn)
1523 			}
1524 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1525 		}
1526 		else
1527 		if( MIndx[k] >= baserow+blockrows  )
1528 		{
1529 			/* new row block, for sure */
1530 			while( MIndx[k] >= Mpntr[MI+1] )++MI;
1531 			blockrows = Mpntr[MI+1] - Mpntr[MI];
1532 			baserow = Mpntr[MI];
1533 
1534 			if( mIndx[k] < basecolumn  )
1535 			{
1536 				/* new row block, new block column  */
1537 				mI = 0;
1538 				while( mIndx[k] >= mpntr[mI+1] )++mI;
1539 				blockcolumns = mpntr[mI+1] - mpntr[mI];
1540 				basecolumn = mpntr[mI];
1541 			}
1542 			else
1543 			{
1544 				/* new row block, same column  */
1545 			}
1546 			++K;
1547 			mtxAp->bindx [ K ] = mI;			/* a 'new' block */
1548 			indptr[ K+1 ] = indptr[ K  ] + blockrows * blockcolumns;
1549 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1550 			if( rsb__have_linked_storage(mtxAp->flags) )
1551 			{
1552 				if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1553 	RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),mI,MI,blockcolumns,blockrows,basecolumn,baserow)
1554 				else
1555 	RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),MI,mI,blockrows,blockcolumns,baserow,basecolumn)
1556 			}
1557 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1558 		}
1559 		else
1560 		{
1561 			/* same block row for sure */
1562 		}
1563 		dst = mtxAp->VA;
1564 		dst += RSB_BLOCK_OFFSET(mtxAp,K);
1565 		{
1566 		rsb_nnz_idx_t ibo = 0;
1567 		if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1568 			ibo = RSB_GET_INTRA_BLOCK_OFFSET(mIndx[k],MIndx[k],mI,MI,mtxAp) ;
1569 		else
1570 			ibo = RSB_GET_INTRA_BLOCK_OFFSET(MIndx[k],mIndx[k],MI,mI,mtxAp) ;
1571 		dst += ibo;
1572 		}
1573 		//RSB_ERROR("%d %d %d\n",((rsb_byte_t*)dst)-((rsb_byte_t*)mtxAp->VA),MIndx[k],mIndx[k]);
1574 		src = ((rsb_byte_t*)VA) + mtxAp->el_size * k;
1575 		RSB_NUMERICAL_TYPE_SET_ELEMENT(dst,src,mtxAp->typecode);
1576 		++k;
1577 	}
1578 
1579 	if(nnz)++K;
1580 	mtxAp->bindx[K] = 0;	// the first element off the 'working' bindx should be set to a safe value
1581 
1582 	if(mtxAp->flags & RSB_FLAG_SHOULD_DEBUG)
1583 	if( K != mtxAp->block_count )
1584 	{
1585 		RSB_ERROR("K is %zd ! should be %zd (block count)!\n",(size_t)K,(size_t)mtxAp->block_count);
1586 		RSB_STDERR("nnz : %zd\n",(size_t)nnz);
1587 		RSB_STDERR("k : %zd\n",(size_t)k);
1588 		errval = RSB_ERR_INTERNAL_ERROR;
1589 		goto err;
1590 	}
1591 err:
1592 	RSB_DO_ERR_RETURN(errval)
1593 }
1594 
rsb__do_account_sorted(struct rsb_mtx_t * mtxAp,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_nnz_idx_t * elements_per_block_row,rsb_nnz_idx_t * blocks_per_block_row)1595 rsb_err_t rsb__do_account_sorted( struct rsb_mtx_t * mtxAp, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_nnz_idx_t * elements_per_block_row, rsb_nnz_idx_t * blocks_per_block_row)
1596 {
1597 	/*!
1598 	 * \ingroup gr_internals
1599 	 *
1600 	 * The routine for inserting BCSR sorted coo elements in a fresh matrix.
1601 	 * It is not optimized, and it should be used as a debug resort when tuning optimized ones.
1602 	 *
1603 	 * FIXME : this code is deprecated in favour of rsb__do_account_sorted_optimized
1604 	 * FIXME : does not support lots of flags!
1605 	 */
1606 	rsb_coo_idx_t blockrows = 0;
1607 	rsb_coo_idx_t blockcolumns = 0;
1608 	rsb_coo_idx_t baserow = 0;
1609 	rsb_coo_idx_t basecolumn = 0;
1610 	const rsb_coo_idx_t *Mpntr = NULL;
1611 	const rsb_coo_idx_t *mpntr = NULL;
1612 	const rsb_coo_idx_t *MIndx = NULL;
1613 	const rsb_coo_idx_t *mIndx = NULL;
1614 	rsb_blk_idx_t mI = 0,MI = 0;
1615 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1616 	rsb_nnz_idx_t k = 0;	/* will index a nnz sized array */
1617 	rsb_nnz_idx_t K = 0;
1618 	k = mI = MI = K=0;
1619 
1620 	if( ! IA || ! JA
1621 #if !RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
1622 	|| !pinfop
1623 #endif /* RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS */
1624 	)
1625 	{
1626 		errval = RSB_ERR_BADARGS;goto err;
1627 	}
1628 
1629 	if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1630 	{
1631 		mpntr = pinfop->rpntr;
1632 		Mpntr = pinfop->cpntr;
1633 		mIndx = IA;
1634 		MIndx = JA;
1635 	}
1636 	else
1637 	{
1638 		Mpntr = pinfop->rpntr;
1639 		mpntr = pinfop->cpntr;
1640 		MIndx = IA;
1641 		mIndx = JA;
1642 	}
1643 
1644 	while( MIndx[k] >= Mpntr[MI+1] )++MI;	/* skipping 'preceding' block rows .. */
1645 	while( mIndx[k] >= mpntr[mI+1] )++mI;	/* skipping 'preceding' block columns .. */
1646 	blockrows = Mpntr[MI+1] - Mpntr[MI];
1647 	blockcolumns = mpntr[mI+1] - mpntr[mI];
1648 	baserow = Mpntr[MI];
1649 	basecolumn = mpntr[mI];
1650 	elements_per_block_row[MI*0] += blockrows * blockcolumns;
1651 	blocks_per_block_row[MI] += 1;
1652 
1653 	while(RSB_LIKELY(k<nnz))
1654 	{
1655 #ifdef DEBUG
1656 		if( MIndx[k] < baserow  )
1657 		{
1658 			RSB_ERROR("k=%d : (%d %d) is not ok\n",k, MIndx[k]+1,mIndx[k]+1);
1659 			RSB_STDERR("(minor dim. index %d < base row %d)\n",MIndx[k] , baserow);
1660 			errval = RSB_ERR_INTERNAL_ERROR;
1661 			goto err;
1662 		}
1663 #endif /* DEBUG */
1664 
1665 		if( mIndx[k] >= basecolumn+blockcolumns  )
1666 		{
1667 			/* new block column, for sure */
1668 			while( mIndx[k] >= mpntr[mI+1] )++mI;
1669 			blockcolumns = mpntr[mI+1] - mpntr[mI];
1670 			basecolumn = mpntr[mI];
1671 
1672 			if( MIndx[k] >= baserow+blockrows  )
1673 			{
1674 				/* new block row  */
1675 				while( MIndx[k] >= Mpntr[MI+1] )++MI;
1676 				blockrows = Mpntr[MI+1] - Mpntr[MI];
1677 				baserow = Mpntr[MI];
1678 			}
1679 			else
1680 			{
1681 				/* same block row  */
1682 			}
1683 			elements_per_block_row[MI*0] += blockrows * blockcolumns;
1684 			blocks_per_block_row[MI] += 1;
1685 			++K;
1686 		}
1687 		else
1688 		if( MIndx[k] >= baserow+blockrows  )
1689 		{
1690 			/* new row block, for sure */
1691 			while( MIndx[k] >= Mpntr[MI+1] )++MI;
1692 			blockrows = Mpntr[MI+1] - Mpntr[MI];
1693 			baserow = Mpntr[MI];
1694 
1695 			if( mIndx[k] < basecolumn  )
1696 			{
1697 				/* new row block, new block column  */
1698 				mI = 0;
1699 				while( mIndx[k] >= mpntr[mI+1] )++mI;
1700 				blockcolumns = mpntr[mI+1] - mpntr[mI];
1701 				basecolumn = mpntr[mI];
1702 			}
1703 			else
1704 			{
1705 				/* new row block, same column  */
1706 			}
1707 			/* get rid of this var : elements_per_block_row */
1708 			elements_per_block_row[MI*0] += blockrows * blockcolumns;
1709 			blocks_per_block_row[MI] += 1;
1710 			++K;
1711 		}
1712 		else
1713 		{
1714 			/* same block row for sure */
1715 		}
1716 		++k;
1717 	}
1718 err:
1719 	RSB_DO_ERR_RETURN(errval)
1720 }
1721 
rsb__allocate_css_from_coo_sorted(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t m,rsb_coo_idx_t k,struct rsb_options_t * o,rsb_type_t typecode,rsb_flags_t flags,rsb_err_t * errvalp)1722 struct rsb_mtx_t * rsb__allocate_css_from_coo_sorted( void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t m, rsb_coo_idx_t k, struct rsb_options_t * o, rsb_type_t typecode, rsb_flags_t flags, rsb_err_t *errvalp)
1723 {
1724 	/*!
1725 	 * \ingroup gr_internals
1726 	 * The routine for matrix building from sorted coo elements. CSR only.
1727 	 *
1728 	 * FIXME : EXPERIMENTAL, UNFINISHED
1729 	 * FIXME : FIX THIS FUNCTION TO ALLOCATE CSR/CSC EVEN IF NOT IN PLACE
1730 	 * */
1731 //	rsb_nnz_idx_t n = 0;
1732 	rsb_nnz_idx_t * elements_per_block_row = NULL;
1733 	rsb_time_t t = RSB_TIME_ZERO;
1734 	struct rsb_mtx_t *mtxAp = NULL;
1735 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1736 	/* const rsb_coo_idx_t *MIndx = NULL, *mIndx = NULL; */
1737 	rsb_blk_idx_t MI = 0;
1738 
1739 	if(!errvalp)
1740 		return NULL;
1741 
1742 	if(!( flags & RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR ))
1743 	{
1744 		return NULL;
1745 	}
1746 	if(flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1747 		return NULL;/* FIXME : only csr now */
1748 
1749 	pinfop = NULL;/* FIXME */
1750 
1751 	if(!o) {errval = RSB_ERR_BADARGS;goto err;}
1752 	rsb__init_struct(mtxAp = rsb__calloc(sizeof(*mtxAp)));
1753 	if(!mtxAp){errval = RSB_ERR_ENOMEM;goto err;}
1754 	if((errval = rsb__set_init_flags_and_stuff(mtxAp,o,pinfop,m,k,0,0,0,typecode,flags))!=RSB_ERR_NO_ERROR)goto err;
1755 	/*
1756 	if(flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1757 	       	mIndx = IA, MIndx = JA;
1758        	else
1759 	       	MIndx = IA, mIndx = JA;
1760 	*/
1761 	elements_per_block_row = rsb__calloc(sizeof(rsb_nnz_idx_t)*(1+mtxAp->Mdim));
1762 
1763 	if(!elements_per_block_row)
1764 	{
1765 		errval = RSB_ERR_ENOMEM;
1766 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
1767 	}
1768 	t = - rsb_time();
1769 
1770 	RSB_DEBUG_ASSERT(rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,nnz,typecode,pinfop,flags)==RSB_ERR_NO_ERROR);
1771 	RSB_DEBUG_ASSERT(rsb__util_are_valid_coo_arrays(IA,JA,nnz)==RSB_ERR_NO_ERROR);
1772 
1773 	errval = rsb__do_account_sorted_optimized(mtxAp,IA,JA,m,k,nnz,NULL,elements_per_block_row,NULL);
1774 	mtxAp->block_count = 0;
1775 
1776 	mtxAp->block_count = nnz;
1777 
1778 	t += rsb_time();
1779 	mtxAp->sat = t;
1780 
1781 	mtxAp->indptr = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1));
1782 	mtxAp->bindx = JA;	/* ok, done :) FIXME : should type - convert ... */
1783 
1784 	if(!mtxAp->bindx ){ errval = RSB_ERR_ENOMEM; goto err;}
1785 	if(!mtxAp->indptr){ errval = RSB_ERR_ENOMEM; goto err;}
1786 
1787 	mtxAp->indptr[0] = 0;/* */
1788 
1789 	mtxAp->bpntr = IA;
1790 	mtxAp->bpntr [0] = 0;
1791 //	mtxAp->bpntr = NULL;
1792 	for(MI=0;MI<mtxAp->Mdim;++MI) mtxAp->bpntr[MI+1]= mtxAp->bpntr[MI]+ elements_per_block_row[MI];
1793 
1794 #if RSB_WANT_BITMAP
1795 	mtxAp->options = o ;
1796 #endif /* RSB_WANT_BITMAP */
1797 	mtxAp->nnz = nnz;
1798 	mtxAp->element_count = nnz;
1799 	mtxAp->VA = VA;
1800 	t = - rsb_time();
1801 	errval = rsb__do_insert_sorted_optimized(mtxAp,VA,IA,JA,nnz,NULL);
1802 	if(RSB_SOME_ERROR(errval))
1803 		goto err;
1804 	t += rsb_time();
1805 	mtxAp->eit = t;
1806 	RSB_CONDITIONAL_FREE(elements_per_block_row);
1807 	return mtxAp;
1808 err:
1809 	RSB_STDERR("rsb__allocate_from_coo_sorted:\n");
1810 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
1811 	if(mtxAp)
1812 		rsb__do_mtx_free(mtxAp);	/* destroys all of the internals of matrix */
1813 	RSB_CONDITIONAL_FREE(elements_per_block_row);
1814 	return NULL;
1815 }
1816 
rsb__allocate_from_coo_sorted(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t m,rsb_coo_idx_t k,struct rsb_options_t * o,rsb_type_t typecode,rsb_flags_t flags,rsb_err_t * errvalp)1817 struct rsb_mtx_t * rsb__allocate_from_coo_sorted( const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t m, rsb_coo_idx_t k, struct rsb_options_t * o, rsb_type_t typecode, rsb_flags_t flags, rsb_err_t *errvalp)
1818 {
1819 	/*!
1820 	 * \ingroup gr_internals
1821 	 * The routine for matrix building from sorted coo elements.
1822 	 * This function requires the coefficients to be sorted accordingly to the inter block ordering policy.
1823 	 *
1824 	 * \param pinfop is the pointer to a rsb_mtx_partitioning_info_t structure with partitioning information.
1825 	 *
1826 	 * \note : should behave well with the flags: RSB_FLAG_WANT_FIXED_BLOCKING_VBR RSB_FLAG_SORTED_INPUT
1827 	 * \note : this function should be optimized and tested thoroughly.
1828 	 * */
1829 
1830 	struct rsb_mtx_t *mtxAp = NULL;
1831 	rsb_blk_idx_t MI = 0;
1832 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1833 	/* rsb_coo_idx_t blockcolumns = 0; */
1834 
1835 	const rsb_coo_idx_t *MIndx = NULL;
1836 
1837 	rsb_nnz_idx_t * elements_per_block_row = NULL;
1838 	rsb_nnz_idx_t * blocks_per_block_row = NULL;	/* per major dimension .. */
1839 	size_t element_count = 0;
1840 
1841 	rsb_time_t t = RSB_TIME_ZERO;
1842 
1843 #if !RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
1844 	if(!pinfop)
1845 	{
1846 		errval = RSB_ERR_BADARGS;goto err;
1847 	}
1848 #endif /* RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS */
1849 	if( flags & RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR )
1850 	{errval = RSB_ERR_BADARGS;goto err;}
1851 
1852 //	if(!o)
1853 //	{
1854 //		errval = RSB_ERR_BADARGS;goto err;
1855 //	}
1856 
1857 	blocks_per_block_row = NULL;	/* per major dimension .. */
1858 	element_count = 0;
1859 
1860 	rsb__init_struct(mtxAp = rsb__calloc(sizeof(struct rsb_mtx_t)));
1861 	if(!mtxAp){errval = RSB_ERR_ENOMEM;goto err;}
1862 
1863 	if((errval = rsb__set_init_flags_and_stuff(mtxAp,o,pinfop,m,k,0,0,0,typecode,flags))!=RSB_ERR_NO_ERROR)
1864 		goto err;
1865 	if(flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1866 		MIndx = JA;
1867 	else
1868 		MIndx = IA;
1869 
1870 	/* FIXME : elements_per_block_row can be replaced with a single variable */
1871 	elements_per_block_row = rsb__calloc(sizeof(rsb_nnz_idx_t)*(1+mtxAp->Mdim));
1872 	blocks_per_block_row = rsb__calloc(sizeof(rsb_nnz_idx_t)*(1+mtxAp->Mdim));
1873 
1874 	if(!blocks_per_block_row  )
1875 	{
1876 		errval = RSB_ERR_ENOMEM;
1877 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
1878 	}
1879 	if(!elements_per_block_row)
1880 	{
1881 		errval = RSB_ERR_ENOMEM;
1882 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
1883 	}
1884 
1885 	t = - rsb_time();
1886 	blocks_per_block_row++;/* we increment the pointer for 1 element (we will use this array as bpntr later)*/
1887 	errval = rsb__do_account_sorted_optimized(mtxAp,IA,JA,m,k,nnz,pinfop,elements_per_block_row,blocks_per_block_row);
1888 	if(RSB_SOME_ERROR(errval))
1889 		goto err;
1890 
1891 	if(nnz==0)blocks_per_block_row[0] = 0;/* handling the degenerate nnz == 0 case (e.g.: unit diag) */
1892 	mtxAp->block_count = 0;
1893 	element_count = 0;
1894 
1895 	for(MI=0;MI<mtxAp->Mdim;++MI)mtxAp->block_count += blocks_per_block_row  [MI];
1896 	for(MI=0;MI<mtxAp->Mdim;++MI)element_count += elements_per_block_row[MI];
1897 
1898 	t += rsb_time();
1899 	mtxAp->sat = t;
1900 	if(RSB_WANT_VERBOSE_MESSAGES)
1901 	RSB_STDERR("matrix creation phase 1 (accounting) : %lf seconds \n", t);
1902 
1903 	mtxAp->indptr = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1));
1904 	mtxAp->bindx = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1));
1905 
1906 	if(!mtxAp->bindx ){ errval = RSB_ERR_ENOMEM; goto err;}
1907 	if(!mtxAp->indptr){ errval = RSB_ERR_ENOMEM; goto err;}
1908 
1909 	mtxAp->indptr[0] = 0;/* */
1910 
1911 	mtxAp->bpntr = (--blocks_per_block_row);	/* :) */
1912 	for(MI=0;MI<mtxAp->Mdim;++MI)
1913 		mtxAp->bpntr[MI+1] += mtxAp->bpntr[MI];	/* in this way bpntr[i] has the count of blocks before row i */
1914 	mtxAp->bpntr [0] = 0;
1915 	blocks_per_block_row = NULL;		/* it will be freed with the matrix */
1916 
1917 	/* second pass : we have allocated the needed arrays and are ready to fill in data structures */
1918 
1919 #if RSB_WANT_BITMAP
1920 	mtxAp->options = o ;
1921 #endif /* RSB_WANT_BITMAP */
1922 	mtxAp->nnz = nnz;
1923 	mtxAp->element_count = element_count;
1924 	//mtxAp->block_count = block_count;
1925 
1926 	if(mtxAp->block_count > nnz)
1927 	{
1928 		errval = RSB_ERR_INTERNAL_ERROR;
1929 		RSB_STDERR("more blocks (%zd) than nonzeros (%zd) ?could be a bug!\n",(size_t)mtxAp->block_count,(size_t)nnz);
1930 		goto err;
1931 	}
1932 
1933 	mtxAp->bpntr[0] = 0;
1934 	mtxAp->VA = rsb__malloc( RSB_TOTAL_BLOCK_BYTES(mtxAp,o));
1935 
1936 	if(RSB_WANT_VERBOSE_MESSAGES)
1937 		RSB_INFO("allocating %zd bytes.\n",(size_t)RSB_TOTAL_BLOCK_BYTES(mtxAp,o) );
1938 
1939 	if(!mtxAp->VA)
1940 	{
1941 		errval = RSB_ERR_ENOMEM;
1942 		RSB_STDERR("had problems allocating %zd bytes.\n",(size_t)RSB_TOTAL_BLOCK_BYTES(mtxAp,o));
1943 		goto err;
1944 	}
1945 	//	k = 0;/* nnz index */
1946 
1947 	t = - rsb_time();
1948 
1949 	/* the following code could run parallel with some work */
1950 	errval = rsb__do_insert_sorted_optimized( mtxAp, VA, IA, JA, nnz, pinfop);
1951 
1952 	if(RSB_SOME_ERROR(errval))
1953 		goto err;
1954 	t += rsb_time();
1955 	mtxAp->eit = t;
1956 	if(RSB_WANT_VERBOSE_MESSAGES)
1957 		RSB_STDERR("matrix creation phase 2 (insertion) : %lf seconds \n", mtxAp->eit);
1958 #if 0
1959 	if((flags & RSB_FLAG_SHOULD_DEBUG) && 0)
1960 	{
1961 		register rsb_coo_idx_t	baserow,basecolumn,rows,columns;
1962 		register rsb_blk_idx_t	blockrow,blockcolumn;
1963 		register rsb_byte_t*bp;
1964 
1965 		/* FIXME : will fail if pure bcsr */
1966 		RSB_GET_FIRST_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1967 		if(0 /* super paranoia */)
1968 		while(!RSB_GOT_LAST_BLOCK_POINTER(mtxAp))
1969 		{
1970 			RSB_INFO("%zd / %zd  ; block (%zd %zd)/(%zd %zd) base : (%zd %zd) size : (%zd %zd)\n",
1971 			(rsb_printf_int_t)_lastk,(rsb_printf_int_t)mtxAp->block_count,
1972 			(rsb_printf_int_t)blockrow,(rsb_printf_int_t)blockcolumns,
1973 			(rsb_printf_int_t)pinfop->M_b,(rsb_printf_int_t)pinfop->K_b,
1974 			(rsb_printf_int_t)baserow,(rsb_printf_int_t)basecolumn,(rsb_printf_int_t)rows,(rsb_printf_int_t)columns);
1975 			RSB_GET_NEXT_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1976 		}
1977 	}
1978 #endif
1979 
1980 	RSB_CONDITIONAL_FREE(elements_per_block_row);
1981 	return mtxAp;
1982 err:
1983 	RSB_STDERR("rsb__allocate_from_coo_sorted:\n");
1984 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
1985 	if(mtxAp)
1986 		rsb__do_mtx_free(mtxAp);	/* destroys all of the internals of matrix */
1987 	if(blocks_per_block_row != MIndx )
1988 		RSB_CONDITIONAL_FREE(blocks_per_block_row );
1989 	RSB_CONDITIONAL_FREE(elements_per_block_row);
1990 	return NULL;
1991 }
1992 
rsb__do_get_blocking_from_pinfo(const struct rsb_mtx_partitioning_info_t * pinfop,rsb_flags_t flags,rsb_blk_idx_t * mbp,rsb_blk_idx_t * kbp)1993 rsb_err_t rsb__do_get_blocking_from_pinfo(const struct rsb_mtx_partitioning_info_t * pinfop, rsb_flags_t flags, rsb_blk_idx_t *mbp, rsb_blk_idx_t *kbp)
1994 {
1995 	if( ( flags & RSB_FLAG_WANT_BCSS_STORAGE ) || ( flags & RSB_FLAG_WANT_FIXED_BLOCKING_VBR ) )
1996 	{
1997 		if( pinfop && pinfop->cpntr && pinfop->rpntr )
1998 		{
1999 			/* FIXME : experimental */
2000 			*kbp = pinfop->cpntr[1]-pinfop->cpntr[0];
2001 			*mbp = pinfop->rpntr[1]-pinfop->rpntr[0];
2002 		}
2003 		else
2004 		if(pinfop)
2005 		{
2006 #if RSB_EXPERIMENTAL_USE_PURE_BCSS
2007 			*mbp = pinfop->br;
2008 			*kbp = pinfop->bc;
2009 #else /* RSB_EXPERIMENTAL_USE_PURE_BCSS */
2010 			*kbp = *mbp = -1;
2011 #endif /* RSB_EXPERIMENTAL_USE_PURE_BCSS */
2012 		}
2013 		else
2014 		{
2015 			*kbp = *mbp = 1;
2016 		}
2017 		RSB_DEBUG_ASSERT(*kbp>=1);
2018 		RSB_DEBUG_ASSERT(*mbp>=1);
2019 		if( *kbp<1 || *mbp <1 )
2020 		{
2021 			return RSB_ERR_BADARGS;
2022 		}
2023 	}
2024 	return RSB_ERR_NO_ERROR;
2025 }
2026 
rsb__util_strlen(const rsb_char_t * s)2027 size_t rsb__util_strlen(const rsb_char_t *s)
2028 {
2029 	/*!
2030 	 * \ingroup gr_internals
2031 	 */
2032 	return strlen(s);/* Flawfinder: ignore */
2033 }
2034 
2035 #if 0
2036 static int rsb_util_sprintf(rsb_char_t *str, const rsb_char_t *format, ...)
2037 {
2038 	/*!
2039 	 * \ingroup gr_internals
2040 	 * FIXME :  BUGGY
2041 	 * */
2042         va_list ap;
2043 	rsb_err_t errval = RSB_ERR_NO_ERROR;
2044 	va_start(ap,format);
2045 	errval = rsb__sprintf(str,format,ap);/* Flawfinder: ignore */
2046 	va_end(ap);
2047 	RSB_DO_ERR_RETURN(errval)
2048 }
2049 #endif
2050 
rsb_util_strcat(rsb_char_t * dest,const rsb_char_t * src)2051 static rsb_char_t *rsb_util_strcat(rsb_char_t *dest, const rsb_char_t *src)
2052 {
2053 	/*!
2054 	 * A wrapper.
2055 	 * \ingroup gr_internals
2056 	 */
2057 	return strcat(dest,src); /* Flawfinder: ignore */
2058 }
2059 
rsb__sprint_matrix_implementation_code2(const struct rsb_mtx_t * mtxAp,rsb_char_t * buf,rsb_flags_t inflags)2060 const rsb_char_t * rsb__sprint_matrix_implementation_code2(const struct rsb_mtx_t *mtxAp, rsb_char_t * buf, rsb_flags_t inflags)
2061 {
2062 	/*!
2063 	 * \ingroup gr_internals
2064 	 *  FIXME : missing error handling
2065 	 * buf be at least RSB_CONST_MATRIX_IMPLEMENTATION_CODE_STRING_MAX_LENGTH chars long.
2066 	 */
2067 	const rsb_char_t sep[] = "\t";
2068 	rsb_blk_idx_t br,bc;
2069 	if(!mtxAp) return NULL;
2070 	buf[0] = '\0';
2071 
2072 	rsb__get_blocking_size(mtxAp,&br,&bc);
2073 
2074 	/* NOTE : assumes BCSR or takes into account only the first blocks */
2075 	rsb__sprintf(buf+rsb__util_strlen(buf),"%ld%s%ld%s",(long)mtxAp->nr,sep,(long)mtxAp->nc,sep);
2076 	rsb__sprintf(buf+rsb__util_strlen(buf),"%ld%s%ld%s",(long)br,sep,(long)bc,sep);
2077 	rsb__sprintf(buf+rsb__util_strlen(buf),"%zd%s%lg",(size_t)rsb__do_get_matrix_nnz(mtxAp),sep,rsb__do_get_matrix_fillin(mtxAp));
2078 
2079 	return buf;
2080 }
2081 
rsb__do_get_symmetry_char(const struct rsb_mtx_t * mtxAp)2082 rsb_char_t rsb__do_get_symmetry_char(const struct rsb_mtx_t *mtxAp)
2083 {
2084 	/*!
2085 	 * \ingroup gr_internals
2086 	 */
2087 	if(rsb__is_symmetric(mtxAp))
2088 		return 'S';
2089 	else
2090 	if(rsb__is_hermitian(mtxAp))
2091 		return 'H';
2092 	else
2093 		return 'G';
2094 }
2095 
rsb_do_get_symmetry_string(const struct rsb_mtx_t * mtxAp,rsb_char_t * auxbuf)2096 static const rsb_char_t * rsb_do_get_symmetry_string(const struct rsb_mtx_t *mtxAp, rsb_char_t * auxbuf)
2097 {
2098 	/*!
2099 	 * \ingroup gr_internals
2100 	 */
2101 	const rsb_char_t * s = "Symmetric";
2102 	const rsb_char_t * g = "General";
2103 	const rsb_char_t * h = "Hermitian";
2104 
2105 	if(rsb__is_symmetric(mtxAp))
2106 		rsb__strcpy(auxbuf,s);
2107 	else
2108 	if(rsb__is_hermitian(mtxAp))
2109 		rsb__strcpy(auxbuf,h);
2110 	else
2111 		rsb__strcpy(auxbuf,g);
2112 	return auxbuf;
2113 }
2114 
rsb__fprint_matrix_implementation_code(const struct rsb_mtx_t * mtxAp,const rsb_char_t * op,rsb_flags_t inflags,FILE * fd)2115 rsb_err_t rsb__fprint_matrix_implementation_code(const struct rsb_mtx_t *mtxAp, const rsb_char_t * op, rsb_flags_t inflags, FILE*fd)
2116 {
2117 	rsb_err_t errval = RSB_ERR_NO_ERROR;
2118 	rsb_char_t buf[RSB_CONST_MATRIX_IMPLEMENTATION_CODE_STRING_MAX_LENGTH];/* Flawfinder: ignore */
2119 
2120 	fprintf( fd, "%s", rsb__sprint_matrix_implementation_code(mtxAp,op,inflags,buf));
2121 	return errval;
2122 }
2123 
rsb__cat_compver(rsb_char_t * buf)2124 void rsb__cat_compver(rsb_char_t * buf)
2125 {
2126 	/* FIXME : fix	rsb_util_sprintf and use it ! */
2127 #if defined(__INTEL_COMPILER)
2128 	/* icc 10.10 is ok */
2129 	rsb__sprintf(buf,"intel-%d",__INTEL_COMPILER);
2130 #elif   defined(__xlC__)
2131 	/* ok on sp5 */
2132 	rsb__sprintf(buf,"xlc-%d",__xlC__);
2133 #elif   defined(__PGI)
2134 	/* pgcc-7.0.4 is ok */
2135 	rsb__sprintf(buf,"pgcc-%d.%d.%d",__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__);
2136 #elif   defined(__clang__)
2137 	rsb__sprintf(buf,"clang-%d.%d.%d",__clang_major__,__clang_minor__,__clang_patchlevel__);
2138 	/* see also __clang_version__ */
2139 #elif   defined(__FUJITSU)
2140 	rsb__sprintf(buf,"fujitsu-%d.%d.%d"/*"(%s)"*/,__FCC_major__,__FCC_minor__,__FCC_patchlevel__/*,__FCC_version__*/);
2141 #elif   defined(__GNUC__)
2142 	rsb__sprintf(buf,"gcc-%d.%d",__GNUC__,__GNUC_MINOR__);
2143 #elif defined(__SUNPRO_CC)
2144 	rsb__sprintf(buf,"sun-%d",__SUNPRO_CC);
2145 #else /* __SUNPRO_CC */
2146 	rsb_util_strcat(buf,"CC?");
2147 #endif /* __SUNPRO_CC */
2148 }
2149 
rsb__sprint_matrix_implementation_code(const struct rsb_mtx_t * mtxAp,const rsb_char_t * op,rsb_flags_t inflags,rsb_char_t * buf)2150 const rsb_char_t * rsb__sprint_matrix_implementation_code(const struct rsb_mtx_t *mtxAp, const rsb_char_t * op, rsb_flags_t inflags, rsb_char_t * buf)
2151 {
2152 	/*!
2153 	 * \ingroup gr_internals
2154 	 *
2155 	 * Gives back a matrix implementation info string.
2156 	 * NOTE: for more consistency, we should translate any \t (TAB) char in ' '.
2157 	 * NOTE: it will give some more info, too..
2158 	 *
2159 	 * \return a static string pointer on correct operation, NULL otherwise
2160 	 * */
2161 
2162 	rsb_char_t sep[] = "/";
2163 	const rsb_char_t * csp;
2164 	rsb_long_t sm = 0;
2165 	rsb_long_t tsm = 0;
2166 	rsb_flags_t flags = RSB_FLAG_NOFLAGS;
2167 	rsb_blk_idx_t br = 0,bc = 0;
2168 	rsb_char_t auxbuf[RSB_TOKLEN];
2169 
2170 	if(!mtxAp)
2171 		return NULL;
2172 
2173 	rsb__get_blocking_size(mtxAp,&br,&bc);
2174 
2175 	flags = mtxAp->flags|inflags;
2176 
2177 	sm = rsb__submatrices(mtxAp);
2178 	tsm = rsb__terminal_recursive_matrix_count(mtxAp);
2179 
2180 	buf[0] = '\0';
2181 
2182 	/* FIXME : DANGER */
2183 
2184 	if(1)
2185 	{
2186 		long hcoo = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2187 		long hcsr = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2188 		long fcoo = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2189 		long fcsr = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2190 		long kinds = 0;
2191 		if(hcoo)++kinds;
2192 		if(hcsr)++kinds;
2193 		if(fcoo)++kinds;
2194 		if(fcsr)++kinds;
2195 #if 0
2196 		if(fcoo==0 && hcoo==0)
2197 			rsb_util_strcat(buf,"CSR");
2198 		else
2199 		if(fcsr==0 && hcsr==0)
2200 			rsb_util_strcat(buf,"COO");
2201 		else
2202 #endif
2203 			rsb_util_strcat(buf,"RSB");
2204 	}
2205 	else
2206 	{
2207 	if(rsb__is_recursive_matrix(flags))
2208 		rsb_util_strcat(buf,"R");
2209 
2210 #ifdef RSB_MATRIX_STORAGE_BCOR
2211 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCOR)
2212 	{
2213 		if(br==1&&bc==1)
2214 			rsb_util_strcat(buf,"COR");
2215 		else
2216 			rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCOR_STRING);
2217 	}
2218 	else
2219 #endif /* RSB_MATRIX_STORAGE_BCOR */
2220 #ifdef RSB_MATRIX_STORAGE_BCOC
2221 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCOC)
2222 	{
2223 		if(br==1&&bc==1)
2224 			rsb_util_strcat(buf,"COC");
2225 		else
2226 			rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCOC_STRING);
2227 	}
2228 	else
2229 #endif /* RSB_MATRIX_STORAGE_BCOC */
2230 #ifdef RSB_MATRIX_STORAGE_BCSR
2231 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCSR)
2232 	{
2233 		if(br==1&&bc==1)
2234 			rsb_util_strcat(buf,"CSR");
2235 		else
2236 			rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCSR_STRING);
2237 	}
2238 	else
2239 #endif /* RSB_MATRIX_STORAGE_BCSR */
2240 #ifdef RSB_MATRIX_STORAGE_BCSC
2241 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCSC)
2242 	{
2243 		if(br==1&&bc==1)
2244 			rsb_util_strcat(buf,"CSC");
2245 		else
2246 			rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCSC_STRING);
2247 	}
2248 	else
2249 #endif /* RSB_MATRIX_STORAGE_BCSC */
2250 #ifdef RSB_MATRIX_STORAGE_VBR
2251 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_VBR )
2252 		rsb_util_strcat(buf,RSB_MATRIX_STORAGE_VBR_STRING);
2253 	else
2254 #endif /* RSB_MATRIX_STORAGE_VBR */
2255 #ifdef RSB_MATRIX_STORAGE_VBC
2256 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_VBC )
2257 		rsb_util_strcat(buf,RSB_MATRIX_STORAGE_VBC_STRING);
2258 	else
2259 #endif /* RSB_MATRIX_STORAGE_VBC */
2260 #ifdef RSB_MATRIX_STORAGE_LR
2261 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_LR )
2262 		rsb_util_strcat(buf,"LBLR");
2263 	else
2264 #endif /* RSB_MATRIX_STORAGE_LR */
2265 #ifdef RSB_MATRIX_STORAGE_LC
2266 	if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_LC )
2267 		rsb_util_strcat(buf,"LBLC");
2268 	else
2269 #endif /* RSB_MATRIX_STORAGE_LC */
2270 		return NULL;
2271 	}
2272 	{
2273 //	if(sm>=1 && rsb__is_recursive_matrix(mtxAp))/* NEW */ /* FIXME : rsb__is_recursive_matrix() seems plagued by indeterminism! */
2274 		if(sm>=1 /*&& rsb__is_recursive_matrix(mtxAp)*/)/* NEW */
2275 			rsb__sprintf(buf+rsb__util_strlen(buf),"(@:%ld/%ld;%3.1lf%%diagnz;%3.1lf%%diagblk)",sm,tsm,
2276 					(((double)rsb__get_diagonal_elements_count(mtxAp)*100)/(mtxAp->nnz)),
2277 					(((double)rsb__get_diagonal_submatrices_count(mtxAp)*100)/(tsm))
2278 					);
2279 	}
2280 #if 1
2281 	/* uhm. this refers to inter block ordering. */
2282 	if( mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
2283 		rsb_util_strcat(buf,"-C");
2284 	else
2285 		rsb_util_strcat(buf,"-R");
2286 #endif
2287 	rsb_util_strcat(buf,sep);
2288 	rsb_util_strcat(buf,"RowMajor");
2289 	rsb_util_strcat(buf,sep);
2290 	rsb_util_strcat(buf,rsb_do_get_symmetry_string(mtxAp,auxbuf));
2291 	rsb_util_strcat(buf,sep);
2292 	rsb_util_strcat(buf,op?op:"");
2293 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_AUTO_BLOCKING))
2294 		rsb_util_strcat(buf,"-AutoBlocking");
2295 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR))
2296 		rsb_util_strcat(buf,"-InPlace");
2297 #if 0
2298 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_USE_HALFWORD_INDICES_COO))
2299 		rsb__sprintf(buf+rsb__util_strlen(buf),"-SwitchToHalfwordCoo:(%ld~%ld)"
2300 		,rsb__terminal_recursive_matrix_count_with_flags(mtxAp,RSB_FLAG_USE_HALFWORD_INDICES_COO)
2301 		,tsm);
2302 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_USE_HALFWORD_INDICES_CSR))
2303 		rsb__sprintf(buf+rsb__util_strlen(buf),"-SwitchToHalfwordCsr:(%ld~%ld)"
2304 		,rsb__terminal_recursive_matrix_count_with_flags_but(mtxAp,RSB_FLAG_USE_HALFWORD_INDICES_CSR,RSB_FLAG_USE_HALFWORD_INDICES_COO)
2305 		,tsm);
2306 #else
2307 	{
2308 		long hcoo = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_COO);
2309 		long hcsr = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2310 		long fcoo = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_COO);
2311 		long fcsr = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2312 		rsb__sprintf(buf+rsb__util_strlen(buf),"-HalfwordCsr:(%ld~%ld)",hcsr,tsm);
2313 		rsb__sprintf(buf+rsb__util_strlen(buf),"-FullwordCsr:(%ld~%ld)",fcsr,tsm);
2314 		rsb__sprintf(buf+rsb__util_strlen(buf),"-HalfwordCoo:(%ld~%ld)",hcoo,tsm);
2315 		rsb__sprintf(buf+rsb__util_strlen(buf),"-FullwordCoo:(%ld~%ld)",fcoo,tsm);
2316 	}
2317 #endif
2318 #if 0
2319 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_RECURSIVE_HALF_DETECTED_CACHE))
2320 		rsb_util_strcat(buf,"-BlockForHalfCache");
2321 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_RECURSIVE_DOUBLE_DETECTED_CACHE))
2322 		rsb_util_strcat(buf,"-BlockForDoubleCache");
2323 #endif
2324 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_RECURSIVE_SUBDIVIDE_MORE_ON_DIAG))
2325 		rsb_util_strcat(buf,"-ExtraDiagonalSubdivisions");
2326 #ifdef RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES
2327 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES))
2328 		rsb_util_strcat(buf,"-NoMicroLeafs");
2329 #endif /* RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES */
2330 
2331 	rsb_util_strcat(buf,sep);
2332 	RSB_NUMERICAL_TYPE_STRING(csp,mtxAp->typecode);
2333 	rsb_util_strcat(buf,csp);
2334 
2335 	if(1)
2336 	{
2337 		rsb_thread_t ncores = 0;
2338 #if RSB_WANT_OMP_RECURSIVE_KERNELS
2339                 #pragma omp parallel RSB_NTC
2340                 if(omp_get_thread_num()==0)
2341                 {
2342                         ncores = omp_get_num_threads();
2343                 }
2344 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
2345 		ncores = ncores?ncores:1;
2346 		rsb_util_strcat(buf,sep);
2347 		rsb__sprintf(buf+rsb__util_strlen(buf),"cores:%d",ncores);
2348 	}
2349 	/* see http://gasnet.cs.berkeley.edu/dist/other/portable_platform.h */
2350 	rsb_util_strcat(buf,sep);
2351 	/* NOTE : on some systems, __GNUC__ is defined even under icc! (therefore we switched precedence) */
2352 
2353 #if 0
2354 	/* FIXME : fix	rsb_util_sprintf and use it ! */
2355 #if defined(__INTEL_COMPILER)
2356 	/* icc 10.10 is ok */
2357 	rsb__sprintf(buf+rsb__util_strlen(buf),"intel-%d",__INTEL_COMPILER);
2358 #elif   defined(__xlC__)
2359 	/* ok on sp5 */
2360 	rsb__sprintf(buf+rsb__util_strlen(buf),"xlc-%d",__xlC__);
2361 #elif   defined(__PGI)
2362 	/* pgcc-7.0.4 is ok */
2363 	rsb__sprintf(buf+rsb__util_strlen(buf),"pgcc-%d.%d.%d",__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__);
2364 #elif   defined(__GNUC__)
2365 	rsb__sprintf(buf+rsb__util_strlen(buf),"gcc-%d.%d",__GNUC__,__GNUC_MINOR__);
2366 #elif defined(__SUNPRO_CC)
2367 	rsb__sprintf(buf+rsb__util_strlen(buf),"sun-%d",__SUNPRO_CC);
2368 #else /* __SUNPRO_CC */
2369 	rsb_util_strcat(buf,"CC?");
2370 #endif /* __SUNPRO_CC */
2371 #else
2372 	rsb__cat_compver(buf+rsb__util_strlen(buf));
2373 #endif
2374 	rsb_util_strcat(buf,sep);
2375 	/* still missing CXX case */
2376 #if   defined(CFLAGS)
2377 	//rsb_util_sprintf(buf+rsb__util_strlen(buf),"%s",CFLAGS);
2378 	rsb__sprintf(buf+rsb__util_strlen(buf),"%s",CFLAGS);
2379 #else /* CFLAGS */
2380 	rsb_util_strcat(buf,"");
2381 #endif /* CFLAGS */
2382 	/* NEW */
2383 	rsb_util_strcat(buf,sep);
2384 	rsb__sprintf(buf+rsb__util_strlen(buf),"sizeof(nnz_idx_t):%zd,",sizeof(rsb_nnz_idx_t));
2385 	rsb__sprintf(buf+rsb__util_strlen(buf),"sizeof(coo_idx_t):%zd,",sizeof(rsb_coo_idx_t));
2386 	rsb__sprintf(buf+rsb__util_strlen(buf),"sizeof(blk_idx_t):%zd",sizeof(rsb_blk_idx_t));
2387 
2388 	/* NEW */
2389 	rsb_util_strcat(buf,sep);
2390 	rsb__sprintf(buf+rsb__util_strlen(buf),"idx_storage:%zd-idx_storage_in_csr:%zd-idx_storage_in_coo:%zd"
2391 		,(size_t)rsb__get_index_storage_amount(mtxAp)
2392 		,((size_t)mtxAp->nnz)*sizeof(rsb_coo_idx_t)+((size_t)mtxAp->Mdim+1)*sizeof(rsb_nnz_idx_t)
2393 		,((size_t)mtxAp->nnz)*sizeof(rsb_coo_idx_t)*2
2394 		);
2395 
2396 	rsb_util_strcat(buf,sep);
2397 #ifdef RSB_PACKAGE_VERSION
2398 	rsb__sprintf(buf+rsb__util_strlen(buf),"version:%s",RSB_PACKAGE_VERSION);
2399 #endif /* RSB_PACKAGE_VERSION */
2400 	rsb_util_strcat(buf,sep);
2401 	rsb_util_strcat(buf,"memhinfo:[");
2402 	{rsb_char_t usmhib[RSB_MAX_LINE_LENGTH];
2403 	rsb_util_strcat(buf,rsb__get_mem_hierarchy_info_string(usmhib));}
2404 	rsb_util_strcat(buf,"]");
2405 	rsb_util_strcat(buf,sep);
2406 #ifdef RSB_HAVE_SYS_UTSNAME_H
2407 	{
2408 		struct utsname un;
2409 		if(uname(&un)==0)
2410 			rsb__sprintf(buf+rsb__util_strlen(buf),"%s",un.nodename);
2411 #if 0
2412            struct utsname {
2413                char sysname[];
2414                char nodename[];
2415                char release[];
2416                char version[];
2417                char machine[];
2418            #ifdef _GNU_SOURCE
2419                char domainname[];
2420            #endif /* _GNU_SOURCE */
2421            };
2422 #endif
2423 	}
2424 #else /* RSB_HAVE_SYS_UTSNAME_H */
2425 rsb_util_strcat(buf,"");
2426 #endif /* RSB_HAVE_SYS_UTSNAME_H */
2427 
2428 	return buf;
2429 }
2430 
rsb__util_get_bx_array(const rsb_char_t * optarg,int * bxlp,rsb_blk_idx_t ** bxvp)2431 rsb_err_t rsb__util_get_bx_array(const rsb_char_t* optarg, int* bxlp, rsb_blk_idx_t **bxvp)
2432 {
2433 	/*!
2434 	   	\ingroup gr_internals
2435 
2436 		Will extract block row and block column sizes from user data codified in optarg.
2437 		\param bxlp will be set to the number of the desired block sizes.
2438 		\param bxvp will be set to an array (of dimension *bxlp) allocated with rsb__malloc() with block sizes.
2439 
2440 	        \note : there are subtle dangers in this function
2441 	        \note : if *bxvp is not NULL, it will be freed
2442 	        \todo : move to some file named parse.c
2443 	 */
2444 	int bxl = 0;
2445 	rsb_blk_idx_t * bxv = NULL;
2446 	const rsb_char_t*c = optarg;
2447 	int mint = 1,maxt = 1;
2448 
2449 	if(!bxlp || !bxvp)
2450 		return RSB_ERR_BADARGS;
2451 
2452 	if(*optarg==':')
2453 	{
2454 #if RSB_WANT_OMP_RECURSIVE_KERNELS
2455 		maxt = omp_get_max_threads();
2456 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
2457 		while(mint<=maxt) mint *= 2,++bxl;
2458 		if( mint > maxt && mint/2 != maxt ) bxl++;
2459 		mint = 1;
2460 	}
2461 	else
2462 	do
2463 	{
2464 		int nul = 0;
2465 		while(*c!=nul && !isdigit(*c))++c;
2466 		if(isdigit(*c))bxl++;
2467 		while(*c &&  isdigit(*c))++c;
2468 	}while(*c);
2469 
2470 	bxv = *bxvp;
2471 	if(bxv)rsb__free(bxv);
2472 	bxv = rsb__malloc(sizeof(rsb_blk_idx_t)*(size_t)bxl);
2473 	if(!bxv)goto err;
2474 	bxl = 0;
2475 	c = optarg;
2476 
2477 	if(*optarg==':')
2478 	{
2479 		while( mint <= maxt )
2480 			bxv[bxl++] = mint, mint *= 2;
2481 		if( bxv[bxl-1] != maxt )
2482 			bxv[bxl++] = maxt;
2483 	}
2484 	else
2485 	do
2486 	{
2487 		int nul = 0,ci;
2488 		while(*c!=nul && !isdigit(*c))++c;
2489 		{
2490 			ci = rsb__util_atoi(c);/* Flawfinder: ignore */
2491 			if(ci<1)goto err;
2492 			if(isdigit(*c))bxv[bxl++] = (rsb_blk_idx_t)ci;
2493 		}
2494 		while(*c &&  isdigit(*c))++c;
2495 	}while(*c);
2496 
2497 	*bxlp = bxl;
2498 	*bxvp = bxv;
2499 
2500 	return RSB_ERR_NO_ERROR;
2501 err:
2502 	RSB_CONDITIONAL_FREE(bxv);
2503 	rsb__do_perror(NULL,RSB_ERR_GENERIC_ERROR);
2504 	return RSB_ERR_GENERIC_ERROR;
2505 }
2506 
2507 #if 0
2508 rsb_nnz_idx_t rsb__util_atonnz(const rsb_char_t * optarg)
2509 {
2510 	/*!
2511 		\ingroup gr_internals
2512 
2513 	   	Will parse a single rsb_nnz_idx_t number.
2514 	 	\param optarg
2515 
2516 		\note : may overflow
2517 		\warning : may overflow
2518 	 */
2519 	rsb_long_t i = rsb__util_atol(optarg);/*Flawfinder: ignore */
2520 	if(i<0)
2521 		i = 0;
2522 	return (rsb_nnz_idx_t)i;
2523 }
2524 #endif
2525 
rsb__util_atol(const rsb_char_t * nptr)2526 rsb_long_t rsb__util_atol(const rsb_char_t *nptr)
2527 {
2528 	/*!
2529 	  	\ingroup gr_internals
2530 	 */
2531 	return atol(nptr);/* Flawfinder: ignore */
2532 }
2533 
rsb__util_atof(const rsb_char_t * nptr)2534 rsb_real_t rsb__util_atof(const rsb_char_t *nptr)
2535 {
2536 	/*!
2537 	  	\ingroup gr_internals
2538 	 */
2539 	return atof(nptr);/* Flawfinder: ignore */
2540 }
2541 
rsb__util_atoi(const rsb_char_t * nptr)2542 int rsb__util_atoi(const rsb_char_t *nptr)
2543 {
2544 	/*!
2545 	  	\ingroup gr_internals
2546 	 */
2547 	int n = 0;
2548 
2549 	if(nptr)
2550 		n = atoi(nptr);/* Flawfinder: ignore */
2551 
2552 	return n;
2553 }
2554 
rsb__util_atoi_kmX(const rsb_char_t * nptr,int base)2555 static int rsb__util_atoi_kmX(const rsb_char_t *nptr, int base)
2556 {
2557 	/*!
2558 	  	\ingroup gr_internals
2559 	 */
2560 	int v = rsb__util_atoi(nptr);
2561 
2562 	if(!nptr)
2563 		goto ret;
2564 	while(isdigit(*nptr))
2565 		++nptr;
2566 	if(*nptr && tolower(*nptr)=='g')
2567 		v *= base * base * base;
2568 	if(*nptr && tolower(*nptr)=='m')
2569 		v *= base * base;
2570 	if(*nptr && tolower(*nptr)=='k')
2571 		v *= base;
2572 ret:
2573 	return v;
2574 }
2575 
rsb__util_atoi_km2(const rsb_char_t * nptr)2576 int rsb__util_atoi_km2(const rsb_char_t *nptr)
2577 {
2578 	return rsb__util_atoi_kmX(nptr, 1024);
2579 }
2580 
rsb__util_atoi_km10(const rsb_char_t * nptr)2581 int rsb__util_atoi_km10(const rsb_char_t *nptr)
2582 {
2583 	return rsb__util_atoi_kmX(nptr, 1000);
2584 }
2585 
rsb__copy_css_arrays(const void * iVA,const rsb_coo_idx_t * iINDX,const rsb_coo_idx_t * iXA,const rsb_nnz_idx_t nnz,rsb_coo_idx_t X,rsb_type_t typecode,void * oVA,rsb_coo_idx_t * oINDX,rsb_nnz_idx_t * oXA)2586 rsb_err_t rsb__copy_css_arrays(const void *iVA, const rsb_coo_idx_t * iINDX, const rsb_coo_idx_t * iXA, const rsb_nnz_idx_t nnz, rsb_coo_idx_t X, rsb_type_t typecode, void *oVA, rsb_coo_idx_t * oINDX, rsb_nnz_idx_t * oXA)
2587 {
2588 	if(!iVA || !iINDX || !iXA || RSB_INVALID_COO_INDEX(X) || RSB_INVALID_NNZ_INDEX(nnz) || !oVA || !oINDX || !oXA)
2589 		return RSB_ERR_BADARGS;
2590 	RSB_CSR_MEMCPY(oVA,oINDX,oXA,iVA,iINDX,iXA,nnz,X,RSB_SIZEOF(typecode));
2591 	return RSB_ERR_NO_ERROR;
2592 }
2593 
rsb__allocate_csc_arrays_from_coo_sorted(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,void ** VAp,rsb_coo_idx_t ** indxp,rsb_nnz_idx_t ** indptrp)2594 rsb_err_t rsb__allocate_csc_arrays_from_coo_sorted(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, void **VAp, rsb_coo_idx_t ** indxp, rsb_nnz_idx_t ** indptrp)
2595 {
2596 	return rsb__allocate_csr_arrays_from_coo_sorted(VA, JA, IA, nnz, k, m, typecode, VAp, indxp, indptrp);
2597 }
2598 
rsb__allocate_csr_arrays_from_coo_sorted(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,void ** VAp,rsb_coo_idx_t ** indxp,rsb_nnz_idx_t ** indptrp)2599 rsb_err_t rsb__allocate_csr_arrays_from_coo_sorted(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, void **VAp, rsb_coo_idx_t ** indxp, rsb_nnz_idx_t ** indptrp)
2600 {
2601 	/*!
2602 	 	\ingroup gr_internals
2603 
2604 		FIXME : UNFINISHED, UNTESTED, NEW, and SLOW
2605 	*/
2606 	rsb_err_t errval = RSB_ERR_NO_ERROR;
2607 	rsb_nnz_idx_t * indx = NULL,MI = 0;
2608 	rsb_coo_idx_t * indpntr = NULL;
2609 	void *cVA = NULL;
2610 
2611 	if(!indxp || !indptrp || !JA || !IA) /* VA is optional */
2612 	{
2613 		errval = RSB_ERR_BADARGS;
2614 		goto err;
2615 	}
2616 
2617 	indx = rsb__clone_area(JA,sizeof(rsb_coo_idx_t)*(nnz));	/* FIXME ! BAD ! */
2618 	indpntr = rsb__calloc(sizeof(rsb_nnz_idx_t)*(m+1));
2619 
2620 	if(!indx || !indpntr)
2621 	{
2622 		errval = RSB_ERR_ENOMEM;
2623 		goto err;
2624 	}
2625 
2626 	if(VA)
2627 	{
2628 		cVA = rsb__clone_area(VA,RSB_SIZEOF(typecode)*(nnz));
2629 		if(!cVA)
2630 		{
2631 			errval = RSB_ERR_ENOMEM;
2632 			goto err;
2633 		}
2634 	}
2635 
2636 	errval = rsb__do_account_sorted_optimized_css(IA,JA,m,k,nnz,indpntr+1,NULL);
2637 
2638 	if(RSB_SOME_ERROR(errval))
2639 		goto err;
2640 
2641 	for(MI=0;MI<m;++MI)
2642 		indpntr[MI+1] += indpntr[MI];
2643 
2644 	if(VAp)*VAp = cVA;
2645 	*indxp = indx;
2646 	*indptrp = indpntr;
2647 
2648 	goto ok;
2649 err:
2650 	RSB_CONDITIONAL_FREE(cVA);
2651 	RSB_CONDITIONAL_FREE(indx);
2652 	RSB_CONDITIONAL_FREE(indpntr);
2653 ok:
2654 	RSB_DO_ERR_RETURN(errval)
2655 }
2656 
rsb__print_configuration_string(const char * pn,rsb_char_t * cs,rsb_bool_t wci)2657 rsb_err_t rsb__print_configuration_string(const char *pn, rsb_char_t * cs, rsb_bool_t wci)
2658 {
2659 	/*!
2660 	 	\ingroup gr_internals
2661 	*/
2662 	/*
2663  		note : an interleaved
2664 			RSB_INFO(
2665 			#ifdef FOO
2666 			"string"
2667 			# endif
2668 			)
2669 			style of string composition breaks xlc, so we don't use it, in the following.
2670 		FIXME: pn should be reasonably short, as this routine does NOT check for buffer overflow. for this same reason this function shall remain INTERNAL always.
2671  	*/
2672 	rsb_char_t buf[RSB_MAX_VERSION_STRING_LENGTH];
2673 	const rsb_char_t * sep = " ";
2674 	const rsb_char_t * nl = "\n";
2675 
2676 	rsb__strcpy(buf,"");
2677 	if(!cs)
2678 	{
2679 		// TODO: missing error handling
2680 	       	goto done;
2681 	}
2682 #if 0
2683 	rsb__sprintf(buf,"%s version: %d.%d.%d\n",pn?pn:"librsb",RSB_LIBRSB_VER_MAJOR,RSB_LIBRSB_VER_MINOR,RSB_LIBRSB_VER_RELEASE);
2684 #else
2685 	rsb__sprintf(buf,"%s version: %s\n",pn?pn:"librsb",RSB_LIBRSB_VER_STRING);
2686 #endif
2687 	if(wci == RSB_BOOL_FALSE)
2688 	{
2689 		rsb__sprintf(cs,"%s",buf);
2690             	rsb__sprintf(cs+strlen(cs),"%s.\n\n",RSB_COPYRIGHT_STRING);
2691             	rsb__sprintf(cs+strlen(cs),"Written by %s.\n",RSB_PACKAGE_BUGREPORT);
2692 		goto done;
2693 	}
2694 	rsb_util_strcat(buf,"format switches:");
2695 #ifdef RSB_MATRIX_STORAGE_BCSR_STRING
2696 	rsb_util_strcat(buf,"br");
2697 	rsb_util_strcat(buf,sep);
2698 #endif /* RSB_MATRIX_STORAGE_BCSR_STRING */
2699 #ifdef RSB_MATRIX_STORAGE_BCSC_STRING
2700 	rsb_util_strcat(buf,"bc");
2701 	rsb_util_strcat(buf,sep);
2702 #endif /* RSB_MATRIX_STORAGE_BCSC_STRING */
2703 #ifdef RSB_MATRIX_STORAGE_VBR_STRING
2704 	rsb_util_strcat(buf,"vr");
2705 	rsb_util_strcat(buf,sep);
2706 #endif /* RSB_MATRIX_STORAGE_VBR_STRING */
2707 #ifdef RSB_MATRIX_STORAGE_VBC_STRING
2708 	rsb_util_strcat(buf,"vc");
2709 	rsb_util_strcat(buf,sep);
2710 #endif /* RSB_MATRIX_STORAGE_VBC_STRING */
2711 #ifdef RSB_MATRIX_STORAGE_LC_STRING
2712 	rsb_util_strcat(buf,"lc");
2713 	rsb_util_strcat(buf,sep);
2714 #endif /* RSB_MATRIX_STORAGE_LC_STRING */
2715 #ifdef RSB_MATRIX_STORAGE_LR_STRING
2716 	rsb_util_strcat(buf,"lr");
2717 	rsb_util_strcat(buf,sep);
2718 #endif /* RSB_MATRIX_STORAGE_LR_STRING */
2719 	rsb_util_strcat(buf,nl);
2720 	rsb_util_strcat(buf,"ops:");
2721 	rsb_util_strcat(buf,RSB_M4_MATRIX_META_OPS_STRING);
2722 	rsb_util_strcat(buf,nl);
2723 
2724 	rsb_util_strcat(buf,"types:");
2725 	rsb_util_strcat(buf,RSB_M4_MATRIX_TYPES_STRING);
2726 	rsb_util_strcat(buf,nl);
2727 	rsb_util_strcat(buf,"type char codes:");
2728 	rsb_util_strcat(buf,RSB_NUMERICAL_TYPE_PREPROCESSOR_SYMBOLS );
2729 	rsb_util_strcat(buf,nl);
2730 	rsb_util_strcat(buf,"transposition codes:");
2731 	rsb_util_strcat(buf,RSB_TRANSPOSITIONS_PREPROCESSOR_SYMBOLS );
2732 	rsb_util_strcat(buf,nl);
2733 
2734 	rsb_util_strcat(buf,"restrict keyword is: ");
2735 #ifdef RSB_restrict
2736 	rsb_util_strcat(buf,"on" );
2737 #else /* RSB_restrict */
2738 	rsb_util_strcat(buf,"off" );
2739 #endif /* RSB_restrict */
2740 	rsb_util_strcat(buf,nl);
2741 
2742 	rsb_util_strcat(buf,"row unrolls:");
2743 	rsb_util_strcat(buf,RSB_M4_WANT_COLUMN_UNLOOP_FACTORS_STRING);
2744 	rsb_util_strcat(buf,nl);
2745 	rsb_util_strcat(buf,"column unrolls:");
2746 	rsb_util_strcat(buf,RSB_M4_WANT_ROW_UNLOOP_FACTORS_STRING	);
2747 	rsb_util_strcat(buf,nl);
2748 	rsb_util_strcat(buf,"reference benchmark sample minimum time (seconds):%lg\n");
2749 	rsb_util_strcat(buf,"reference benchmark sample minimum runs:%zd\n");
2750 	rsb_util_strcat(buf,"maximal configured block size:%zd\n");
2751 #ifdef RSB_WANT_OSKI_BENCHMARKING
2752 	rsb_util_strcat(buf,"oski comparative benchmarking enabled\n");
2753 #endif /* RSB_WANT_OSKI_BENCHMARKING */
2754 	rsb_util_strcat(buf,"sizeof(rsb_nnz_idx_t):%zd\n");
2755 	rsb_util_strcat(buf,"sizeof(rsb_coo_idx_t):%zd\n");
2756 	rsb_util_strcat(buf,"sizeof(rsb_blk_idx_t):%zd\n");
2757 	rsb_util_strcat(buf,"sizeof(size_t):%zd\n");
2758 	rsb_util_strcat(buf,"sizeof(struct rsb_mtx_t):%zd\n");
2759 	rsb_util_strcat(buf,"sizeof(struct rsb_blas_sparse_matrix_t):%zd\n");
2760 	rsb_util_strcat(buf,"sizeof(struct rsb_coo_matrix_t):%zd\n");
2761 	rsb_util_strcat(buf,"RSB_MAX_MATRIX_DIM:%zd\n");
2762 	rsb_util_strcat(buf,"RSB_MAX_MATRIX_NNZ:%zd\n");
2763 	rsb_util_strcat(buf,"RSB_CONST_MAX_SUPPORTED_CORES:%zd\n");
2764 	rsb_util_strcat(buf,"RSB_BLAS_MATRICES_MAX:%zd\n");
2765 	rsb_util_strcat(buf,"RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH:%zd\n");
2766 
2767 	rsb_util_strcat(buf,"RSB_USER_SET_MEM_HIERARCHY_INFO:%s\n");
2768 	rsb_util_strcat(buf,"RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t):%zd\n");
2769 	rsb_util_strcat(buf,"RSB_IOLEVEL:%d\n");
2770 	//RSB_INFO(
2771 	rsb__sprintf(cs,
2772 		buf,
2773 		RSB_BENCHMARK_MIN_SECONDS,
2774 		(rsb_printf_int_t)RSB_BENCHMARK_MIN_RUNS,
2775 		(rsb_printf_int_t)RSB_MAXIMAL_CONFIGURED_BLOCK_SIZE,
2776 		sizeof(rsb_nnz_idx_t),
2777 		sizeof(rsb_coo_idx_t),
2778 		sizeof(rsb_blk_idx_t),
2779 		sizeof(size_t),
2780 		sizeof(struct rsb_mtx_t),
2781 		sizeof(struct rsb_blas_sparse_matrix_t),
2782 		sizeof(struct rsb_coo_matrix_t),
2783 		(rsb_printf_int_t)RSB_MAX_MATRIX_DIM,
2784 		(rsb_printf_int_t)RSB_MAX_MATRIX_NNZ,
2785 		(rsb_printf_int_t)RSB_CONST_MAX_SUPPORTED_CORES,
2786 		(rsb_printf_int_t)RSB_BLAS_MATRICES_MAX,
2787 		(rsb_printf_int_t)RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH
2788 		,(rsb_printf_int_t)rsb__init_get_mem_hierarchy_info_string(RSB_BOOL_FALSE)?rsb__init_get_mem_hierarchy_info_string(RSB_BOOL_FALSE):NULL
2789 		,RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t)
2790 		,RSB_IOLEVEL
2791 	);
2792 done:
2793 	return RSB_ERR_NO_ERROR;
2794 }
2795 
rsb__recursive_middle_block_index(rsb_blk_idx_t i)2796 rsb_blk_idx_t rsb__recursive_middle_block_index(rsb_blk_idx_t i)
2797 {
2798 	/*!
2799 	 * \ingroup gr_internals
2800 	 *
2801 	 * \return the index of the split point index.
2802 	 */
2803 #if RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION
2804 	rsb_blk_idx_t s = 0;
2805 	while( (1<<s+1) < i)
2806 		++s;
2807 	return (1<<s);
2808 #else /* RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION */
2809 #if 1
2810 	return (i+1)/2;
2811 #else
2812 	int p = 0;
2813 	while(i>>(p+1) && i > (1<<(p+1)))
2814 		++p;
2815 //	RSB_INFO("%d %d %d\n",i,p,(1<<p));
2816 	return (1<<p);
2817 #endif
2818 #endif /* RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION */
2819 }
2820 
rsb__recursive_middle_index(const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t * M_bp,rsb_coo_idx_t * K_bp)2821 rsb_err_t rsb__recursive_middle_index(const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t * M_bp, rsb_coo_idx_t * K_bp )
2822 {
2823 	/*!
2824 	 * \ingroup gr_internals
2825 	 *
2826 	 * \return the index of ..
2827 	 */
2828 	rsb_blk_idx_t mBi;
2829 	rsb_blk_idx_t	kBi;
2830 	if(!pinfop || !M_bp || !K_bp)
2831 		return RSB_ERR_BADARGS;
2832 	mBi = rsb__recursive_middle_block_index(pinfop->M_b);
2833 	kBi = rsb__recursive_middle_block_index(pinfop->K_b);
2834 	if(pinfop->rpntr)
2835 		*M_bp = pinfop->rpntr[rsb__recursive_middle_block_index(mBi)];
2836 	else
2837 		*M_bp = rsb__recursive_middle_block_index(pinfop->nr);
2838 	if(pinfop->cpntr)
2839 		*K_bp = pinfop->cpntr[rsb__recursive_middle_block_index(kBi)];
2840 	else
2841 		*K_bp = rsb__recursive_middle_block_index(pinfop->nc);
2842 	return RSB_ERR_NO_ERROR;
2843 }
2844 
rsb__recursive_split_point_parms_get(const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t * moff,rsb_coo_idx_t * koff)2845 rsb_err_t rsb__recursive_split_point_parms_get(
2846 		const struct rsb_mtx_partitioning_info_t * pinfop,
2847 		rsb_coo_idx_t * moff, rsb_coo_idx_t * koff)
2848 {
2849 	/*!
2850 		\ingroup gr_internals
2851 
2852 		FIXME: this is function is OBSOLETE
2853 	*/
2854 	rsb_err_t errval = RSB_ERR_NO_ERROR;
2855 
2856 	if(!pinfop || !pinfop->rpntr || !pinfop->cpntr)
2857 	{
2858 		errval = RSB_ERR_BADARGS;
2859 	}
2860 	RSB_DEBUG_ASSERT(moff);
2861 	RSB_DEBUG_ASSERT(koff);
2862 	RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(pinfop->M_b));
2863 	RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(pinfop->K_b));
2864 
2865 	*moff = pinfop->rpntr[ rsb__recursive_middle_block_index(pinfop->M_b) ];
2866 	*koff = pinfop->cpntr[ rsb__recursive_middle_block_index(pinfop->K_b) ];
2867 
2868 	RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(*moff));
2869 	RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(*koff));
2870 
2871 	RSB_DO_ERR_RETURN(errval)
2872 }
2873 
rsb__terminal_recursive_matrix_count(const struct rsb_mtx_t * mtxAp)2874 rsb_long_t rsb__terminal_recursive_matrix_count(const struct rsb_mtx_t *mtxAp)
2875 {
2876 	/*!
2877 	 * \ingroup gr_internals
2878 	 * \return the count of leaf (terminal) matrices
2879 	 *
2880 	 * TODO : change this function type!
2881 	 */
2882 	rsb_submatrix_idx_t i,j;
2883 	struct rsb_mtx_t * submatrix = NULL;
2884 	rsb_long_t smc = 0;
2885 
2886 	if(!mtxAp)
2887 	{smc = 0;goto done;}
2888 
2889 	if(rsb__is_terminal_recursive_matrix(mtxAp))
2890 	{smc = 1;goto done;}
2891 
2892 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
2893 	if(submatrix)
2894 		smc += rsb__terminal_recursive_matrix_count(submatrix);
2895 done:
2896 	return smc;
2897 }
2898 
rsb__do_compute_terminal_nnz_min_max_avg_count(const struct rsb_mtx_t * mtxAp,rsb_nnz_idx_t * minnz,rsb_nnz_idx_t * maxnz,rsb_nnz_idx_t * avgnz)2899 rsb_err_t rsb__do_compute_terminal_nnz_min_max_avg_count(const struct rsb_mtx_t *mtxAp, rsb_nnz_idx_t * minnz, rsb_nnz_idx_t * maxnz, rsb_nnz_idx_t * avgnz)
2900 {
2901 //	struct rsb_mtx_t * submatrix = NULL;
2902 	rsb_err_t errval = RSB_ERR_NO_ERROR;
2903 	if(minnz)
2904 		*minnz = RSB_MAX_MATRIX_NNZ;
2905 	if(maxnz)
2906 		*maxnz = 0;
2907 	if(avgnz)
2908 		*avgnz = 0;
2909 	errval = rsb__do_compute_terminal_nnz_min_max_count(mtxAp,minnz,maxnz);
2910 	if(avgnz)
2911 		*avgnz = mtxAp->nnz/rsb__terminal_recursive_matrix_count(mtxAp);
2912 	RSB_DO_ERR_RETURN(errval)
2913 }
2914 
rsb__do_compute_terminal_nnz_min_max_count(const struct rsb_mtx_t * mtxAp,rsb_nnz_idx_t * minnz,rsb_nnz_idx_t * maxnz)2915 rsb_err_t rsb__do_compute_terminal_nnz_min_max_count(const struct rsb_mtx_t *mtxAp, rsb_nnz_idx_t * minnz, rsb_nnz_idx_t * maxnz)
2916 {
2917 	/*!
2918 	 * \ingroup gr_internals
2919 	 * \return ...
2920 	 */
2921 	rsb_submatrix_idx_t i,j;
2922 	struct rsb_mtx_t * submatrix = NULL;
2923 	rsb_err_t errval = RSB_ERR_NO_ERROR;
2924 
2925 	if(!mtxAp)
2926 	{
2927 		RSB_ERROR(RSB_ERRM_ES);
2928 		errval = RSB_ERR_BADARGS;
2929 	}
2930 
2931 	if(rsb__is_terminal_recursive_matrix(mtxAp))
2932 	{
2933 		if(minnz)
2934 //			RSB_STDOUT_MATRIX_SUMMARY(mtxAp), RSB_INFO(" <- MIN (from %d)\n",*minnz),
2935 			*minnz = RSB_MIN(*minnz,mtxAp->nnz);
2936 		if(maxnz)
2937 //			RSB_STDOUT_MATRIX_SUMMARY(mtxAp), RSB_INFO(" <- MAX (from %d)\n",*maxnz),
2938 			*maxnz = RSB_MAX(*maxnz,mtxAp->nnz);
2939 	}
2940 	else
2941 	{
2942 		RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
2943 		if(submatrix)
2944 			RSB_DO_ERROR_CUMULATE(errval,rsb__do_compute_terminal_nnz_min_max_count(submatrix,minnz,maxnz));
2945 	}
2946 //done:
2947 	RSB_DO_ERR_RETURN(errval)
2948 }
2949 
rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(const struct rsb_mtx_t * mtxAp,rsb_fmt_t matrix_storage,rsb_flags_t flags)2950 rsb_long_t rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(const struct rsb_mtx_t *mtxAp, rsb_fmt_t matrix_storage, rsb_flags_t flags)
2951 {
2952 	/*!
2953 	 * \ingroup gr_internals
2954 	 * \return the count of leaf (terminal) matrices
2955 	 *
2956 	 * TODO : change this function type!
2957 	 */
2958 	rsb_submatrix_idx_t i,j;
2959 	struct rsb_mtx_t * submatrix = NULL;
2960 	rsb_long_t smc = 0;
2961 
2962 	if(!mtxAp)
2963 	{smc=0;goto done;}
2964 
2965 	if(rsb__is_terminal_recursive_matrix(mtxAp))
2966 	{
2967 		if((!RSB_DO_FLAG_HAS(mtxAp->flags,flags)) && (mtxAp->matrix_storage==matrix_storage))
2968 			smc = 1;
2969 		goto done;
2970 	}
2971 
2972 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
2973 	if(submatrix)
2974 		smc += rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(submatrix,matrix_storage,flags);
2975 done:
2976 	return smc;
2977 }
2978 
rsb__terminal_recursive_matrix_count_with_storage_and_flags(const struct rsb_mtx_t * mtxAp,rsb_fmt_t matrix_storage,rsb_flags_t flags)2979 rsb_long_t rsb__terminal_recursive_matrix_count_with_storage_and_flags(const struct rsb_mtx_t *mtxAp, rsb_fmt_t matrix_storage, rsb_flags_t flags)
2980 {
2981 	/*!
2982 	 * \ingroup gr_internals
2983 	 * \return the count of leaf (terminal) matrices
2984 	 *
2985 	 * TODO : change this function type!
2986 	 */
2987 	rsb_submatrix_idx_t i,j;
2988 	struct rsb_mtx_t * submatrix = NULL;
2989 	rsb_long_t smc = 0;
2990 
2991 	if(!mtxAp)
2992 	{smc = 0;goto done;}
2993 
2994 	if(rsb__is_terminal_recursive_matrix(mtxAp))
2995 	{
2996 		if(RSB_DO_FLAG_HAS(mtxAp->flags,flags) && (mtxAp->matrix_storage==matrix_storage))
2997 			smc = 1;
2998 		goto done;
2999 	}
3000 
3001 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3002 	if(submatrix)
3003 		smc += rsb__terminal_recursive_matrix_count_with_storage_and_flags(submatrix,matrix_storage,flags);
3004 done:
3005 	return smc;
3006 }
3007 
rsb__terminal_recursive_matrix_count_with_flags_but(const struct rsb_mtx_t * mtxAp,rsb_flags_t flags,rsb_flags_t nflags)3008 rsb_long_t rsb__terminal_recursive_matrix_count_with_flags_but(const struct rsb_mtx_t *mtxAp, rsb_flags_t flags, rsb_flags_t nflags)
3009 {
3010 	/*!
3011 	 * \ingroup gr_internals
3012 	 * \return the count of leaf (terminal) matrices
3013 	 *
3014 	 * TODO : change this function type!
3015 	 */
3016 	rsb_submatrix_idx_t i,j;
3017 	struct rsb_mtx_t * submatrix = NULL;
3018 	rsb_long_t smc = 0;
3019 
3020 	if(!mtxAp)
3021 	{smc = 0;goto done;}
3022 
3023 	if(rsb__is_terminal_recursive_matrix(mtxAp))
3024 	{
3025 		if(RSB_DO_FLAG_HAS(mtxAp->flags,flags) && !RSB_DO_FLAG_HAS(mtxAp->flags,nflags))
3026 			smc = 1;
3027 		goto done;
3028 	}
3029 
3030 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3031 	if(submatrix)
3032 		smc += rsb__terminal_recursive_matrix_count_with_flags_but(submatrix,flags,nflags);
3033 done:
3034 	return smc;
3035 }
3036 
rsb__terminal_recursive_matrix_count_with_flags(const struct rsb_mtx_t * mtxAp,rsb_flags_t flags)3037 rsb_long_t rsb__terminal_recursive_matrix_count_with_flags(const struct rsb_mtx_t *mtxAp, rsb_flags_t flags)
3038 {
3039 	/*!
3040 	 * \ingroup gr_internals
3041 	 * \return the count of leaf (terminal) matrices
3042 	 *
3043 	 * TODO : change this function type!
3044 	 */
3045 	rsb_submatrix_idx_t i,j;
3046 	struct rsb_mtx_t * submatrix = NULL;
3047 	rsb_long_t smc = 0;
3048 
3049 	if(!mtxAp)
3050 	{smc = 0;goto done;}
3051 
3052 	if(rsb__is_terminal_recursive_matrix(mtxAp))
3053 	{
3054 		if(RSB_DO_FLAG_HAS(mtxAp->flags,flags))
3055 			smc = 1;
3056 		goto done;
3057 	}
3058 
3059 	RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3060 	if(submatrix)
3061 		smc += rsb__terminal_recursive_matrix_count_with_flags(submatrix,flags);
3062 done:
3063 	return smc;
3064 }
3065 
rsb__do_transposition_from_char(rsb_char_t tc)3066 rsb_trans_t rsb__do_transposition_from_char(rsb_char_t tc)
3067 {
3068 	/*!
3069 	 * \ingroup gr_internals
3070 	 * FIXME : document this
3071 	 */
3072 	if(tolower(tc)=='t')
3073 		return RSB_TRANSPOSITION_T;
3074 	else
3075 	if(tolower(tc)=='n')
3076 		return RSB_TRANSPOSITION_N;
3077 	else
3078 	if(tolower(tc)=='c')
3079 		return RSB_TRANSPOSITION_C;
3080 	else
3081 		return RSB_INVALID_TRANS;
3082 }
3083 
rsb__do_transpose_transposition(rsb_trans_t transA)3084 rsb_trans_t rsb__do_transpose_transposition(rsb_trans_t transA)
3085 {
3086 	/*!
3087 	 * \ingroup gr_internals
3088 	 * FIXME : document this
3089 	 */
3090 	if(RSB_DOES_NOT_TRANSPOSE(transA))
3091 		return RSB_TRANSPOSITION_T;
3092 	if(transA == RSB_TRANSPOSITION_T)
3093 		return RSB_TRANSPOSITION_N;
3094 	if(transA == RSB_TRANSPOSITION_C)
3095 		return RSB_TRANSPOSITION_N;
3096 	return transA;
3097 }
3098 
rsb_spmm_inner(const struct rsb_mtx_t * mtxAp,const void * mrhs,void * mout,rsb_int_t bstride,rsb_int_t cstride,rsb_int_t nrhs,rsb_trans_t transA)3099 rsb_err_t rsb_spmm_inner(const struct rsb_mtx_t * mtxAp, const void * mrhs, void *mout, rsb_int_t bstride, rsb_int_t cstride, rsb_int_t nrhs, rsb_trans_t transA)
3100 {
3101 #ifdef RSB_HAVE_OPTYPE_SPMM_AZ
3102 	/*!
3103 	 * \ingroup gr_internals
3104 	 * fixme */
3105 
3106 	size_t el_size = mtxAp->el_size;
3107 
3108 	if( rsb__is_recursive_matrix(mtxAp->flags))
3109 	{
3110 		rsb_submatrix_idx_t i,j;
3111 		struct rsb_mtx_t * submatrix = NULL;
3112 		rsb_coo_idx_t mB = (mtxAp->rpntr[rsb__recursive_middle_block_index(mtxAp->M_b)]);
3113 		rsb_coo_idx_t kB = (mtxAp->cpntr[rsb__recursive_middle_block_index(mtxAp->K_b)]);
3114 
3115 		RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3116 		if(submatrix)
3117 		{
3118 			rsb_coo_idx_t moff,koff;
3119 
3120 			moff = i*mB;
3121 			koff = j*kB;
3122 
3123 			rsb_spmm_inner(submatrix,((rsb_byte_t*)mrhs)+koff*el_size,((rsb_byte_t*)mout)+moff*el_size,bstride,cstride,nrhs,transA);
3124 		}
3125 		return RSB_ERR_NO_ERROR;
3126 	}
3127 	else
3128 		return rsb__do_spmm_az(mtxAp,mrhs,mout,bstride,cstride,nrhs,transA);/*FIXME*/
3129 #else /* RSB_HAVE_OPTYPE_SPMM_AZ */
3130 	return RSB_ERR_UNSUPPORTED_OPERATION;
3131 #endif /* RSB_HAVE_OPTYPE_SPMM_AZ */
3132 }
3133 
rsb__do_flip_uplo_flags(rsb_flags_t flags)3134 rsb_flags_t rsb__do_flip_uplo_flags(rsb_flags_t flags)
3135 {
3136 	/**
3137 	 * \ingroup gr_internals
3138 	 */
3139 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_UPPER))
3140 		RSB_DO_FLAG_SUBST(flags,RSB_FLAG_UPPER,RSB_FLAG_LOWER);
3141 	else
3142 		if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_LOWER))
3143 			RSB_DO_FLAG_SUBST(flags,RSB_FLAG_LOWER,RSB_FLAG_UPPER);
3144 	return flags;
3145 }
3146 
rsb__do_detect_and_add_triangular_flags(rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_flags_t flags)3147 rsb_flags_t rsb__do_detect_and_add_triangular_flags(rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_flags_t flags)
3148 {
3149 	/**
3150 	 * \ingroup gr_internals
3151 	 */
3152 	rsb_nnz_idx_t i;
3153 	if(!IA || !JA || RSB_INVALID_NNZ_INDEX(nnz))
3154 		return flags;
3155 	/* FIXME: this code could be optimized a great deal, by introducing a state machine like scan. */
3156 	for(i=0;i<nnz;++i)
3157 	{
3158 		if(IA[i]==JA[i])
3159 			continue;
3160 		if(IA[i]>JA[i])
3161 			RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
3162 		else
3163 			RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
3164 	}
3165 	if(RSB_NAND(RSB_DO_FLAG_HAS(flags,RSB_FLAG_UPPER),RSB_DO_FLAG_HAS(flags,RSB_FLAG_LOWER)))
3166 		RSB_DO_FLAG_ADD(flags,RSB_FLAG_TRIANGULAR);
3167 
3168 	/* it could be the case that both RSB_FLAG_LOWER and RSB_FLAG_UPPER flags get caught */
3169 	return flags;
3170 }
3171 
rsb__do_load_matrix_file_as_matrix_market(struct rsb_mtx_t ** mtxApp,const rsb_char_t * filename,rsb_flags_t flags,rsb_type_t typecode)3172 rsb_err_t rsb__do_load_matrix_file_as_matrix_market(struct rsb_mtx_t ** mtxApp, const rsb_char_t * filename, rsb_flags_t flags, rsb_type_t typecode)
3173 {
3174 	/*!
3175 	 * FIXME: and typecode check ?
3176 	 * FIXME: UNFINISHED
3177 	 */
3178 	/** \ingroup gr_unfinished */
3179 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3180 	struct rsb_mtx_t * mtxAp = NULL;
3181 	void *VA = NULL;
3182 	rsb_coo_idx_t *IA = NULL, *JA = NULL;
3183 
3184 	if(!mtxApp)
3185 	{
3186 		RSB_ERROR(RSB_ERRM_E_MTXAP);
3187 		errval = RSB_ERR_BADARGS;
3188 	}
3189 	else
3190 	if(!filename)
3191 	{
3192 		RSB_ERROR(RSB_ERRM_NPSFF);
3193 		errval = RSB_ERR_BADARGS;
3194 	}
3195 	else
3196 	{
3197 		rsb_coo_idx_t m = 0,k = 0;
3198 		rsb_nnz_idx_t nnz = 0;
3199 #define RSB_20120309_FIX 1
3200 #if RSB_20120309_FIX
3201 		rsb_bool_t is_symmetric = RSB_BOOL_FALSE;
3202 		rsb_bool_t is_hermitian = RSB_BOOL_FALSE;
3203 		rsb_bool_t is_lower = RSB_BOOL_FALSE;
3204 		rsb_bool_t is_upper = RSB_BOOL_FALSE;
3205 		rsb_bool_t is_vector = RSB_BOOL_FALSE;
3206 		/* FIXME: shall update test_matops.c accordingly. */
3207 		/* FIXME: need limits checks! */
3208 		if(RSB_SOME_ERROR(rsb__util_mm_info_matrix_f(filename,&m,&k,&nnz,NULL,&is_symmetric,&is_hermitian,NULL,&is_lower,&is_upper,&is_vector) ) || is_vector )
3209 		{
3210 			errval = RSB_ERR_BADARGS;
3211 			RSB_PERR_GOTO(err,RSB_ERRMSG_NOTMTXMKT" : %s ..\n",filename);
3212 		}
3213 		else
3214 		{
3215 			if( is_symmetric == RSB_BOOL_TRUE ) RSB_DO_FLAG_ADD(flags,RSB_FLAG_SYMMETRIC);
3216 			if( is_hermitian == RSB_BOOL_TRUE ) RSB_DO_FLAG_ADD(flags,RSB_FLAG_HERMITIAN);
3217 			if(is_upper) RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
3218 			if(is_lower) RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
3219 		}
3220 
3221 		if( m==k && m>1 /* && want_only_lowtri*/ )
3222 			nnz += m;	/* the loading routine shall allocate nnz+m */
3223 		else
3224  			nnz = 0;	/* the loading routine should determine nnz */
3225 #endif /* RSB_20120309_FIX */
3226 		if((errval = rsb__util_mm_load_matrix_f(filename,&IA,&JA,&VA,&m,&k,&nnz,typecode,flags,NULL,NULL))!=RSB_ERR_NO_ERROR)
3227 		{
3228 			RSB_ERROR(RSB_ERRM_ES);
3229 			rsb__do_perror(NULL,errval);
3230 			goto err;
3231 		}
3232 		if((mtxAp = rsb__do_mtx_alloc_from_coo_inplace(VA,IA,JA,nnz,typecode,m,k,RSB_DEFAULT_ROW_BLOCKING,RSB_DEFAULT_COL_BLOCKING,flags,&errval))==NULL)
3233 		{
3234 			RSB_PERR_GOTO(err,RSB_ERRM_ES);
3235 			// FIXME: incomplete error handling
3236 		}
3237 		if(mtxAp)
3238 		{
3239 			rsb_submatrix_idx_t smi;
3240 			struct rsb_mtx_t * submatrix;
3241 
3242 			RSB_SUBMATRIX_FOREACH_LEAF(mtxAp,submatrix,smi)
3243 				RSB_DO_FLAG_DEL(submatrix->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3244 			RSB_DO_FLAG_DEL(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3245 		}
3246 		*mtxApp = mtxAp;
3247 		goto ok;
3248 	}
3249 err:
3250 	RSB_CONDITIONAL_FREE(IA);
3251 	RSB_CONDITIONAL_FREE(JA);
3252 	RSB_CONDITIONAL_FREE(VA);
3253 ok:
3254 	RSB_DO_ERR_RETURN(errval)
3255 }
3256 
rsb__are_coo_matrices_equal(const struct rsb_coo_matrix_t * cm1,const struct rsb_coo_matrix_t * cm2)3257 rsb_bool_t rsb__are_coo_matrices_equal(const struct rsb_coo_matrix_t *cm1, const struct rsb_coo_matrix_t *cm2)
3258 {
3259 	/* this is a debug routine and may print internal stuff */
3260 	const rsb_bool_t no = RSB_BOOL_FALSE;
3261 	const rsb_bool_t yes = RSB_BOOL_TRUE;
3262 	rsb_nnz_idx_t nnz;
3263 #if (RSB_WANT_VERBOSE_MESSAGES || 1)
3264 #define	RSB_GOTO_DIFFERING	{ RSB_PERR_GOTO(differing,RSB_ERRM_ES);}
3265 #else /* RSB_WANT_VERBOSE_MESSAGES */
3266 #define	RSB_GOTO_DIFFERING	{goto differing;}
3267 #endif /* RSB_WANT_VERBOSE_MESSAGES */
3268 	if( cm1 ==  cm2)
3269 		goto equal;
3270 	if(!cm1 || !cm2)
3271 		RSB_GOTO_DIFFERING
3272 	nnz = cm1->nnz;
3273 	if( RSB_INVALID_NNZ_INDEX(nnz) )
3274 		RSB_GOTO_DIFFERING
3275 	if(cm1->nr!= cm2->nr)
3276 		RSB_GOTO_DIFFERING
3277 	if(cm1->nc!= cm2->nc)
3278 		RSB_GOTO_DIFFERING
3279 	if(cm1->nnz!= cm2->nnz)
3280 		RSB_GOTO_DIFFERING
3281 	if(cm1->typecode!= cm2->typecode)
3282 		RSB_GOTO_DIFFERING
3283 	//if((!cm1->IA)&&(!cm2->IA)) return no;
3284 	//if((!cm1->IA)||(!cm2->IA)) return no;
3285 	//else
3286 		if((cm1->IA)&&(cm2->IA))
3287 		if(RSB_MEMCMP(cm1->IA,cm2->IA,sizeof(rsb_coo_idx_t)*nnz))
3288 			RSB_GOTO_DIFFERING
3289 	//if((!cm1->JA)&&(!cm2->JA)) return no;
3290 	//if((!cm1->JA)||(!cm2->JA)) return no;
3291 	//else
3292 		if((cm1->JA)&&(cm2->JA))
3293 		if(RSB_MEMCMP(cm1->JA,cm2->JA,sizeof(rsb_coo_idx_t)*nnz))
3294 			RSB_GOTO_DIFFERING
3295 	//if((!cm1->VA)&&(!cm2->VA)) return no;
3296 	//if((!cm1->VA)||(!cm2->VA)) return no;
3297 	//else
3298 		if((cm1->VA)&&(cm2->VA))
3299 		{
3300 #if 1
3301 			if(rsb__do_are_same(cm1->VA,cm2->VA, nnz,cm1->typecode, 1, 1))
3302 				RSB_GOTO_DIFFERING
3303 #else
3304 			if(RSB_MEMCMP(cm1->VA,cm2->VA,RSB_SIZEOF(cm1->typecode)*nnz)) /* This is too strict: for it, -0.0 != 0.0 */
3305 				RSB_GOTO_DIFFERING
3306 #endif
3307 		}
3308 
3309 
3310 equal:
3311 	return yes;
3312 differing:
3313 #if RSB_ALLOW_STDOUT
3314 #if (RSB_WANT_VERBOSE_MESSAGES || 1)
3315 	if(cm1)RSB_STDOUT_COO_MATRIX_SUMMARY(cm1);
3316 	if(cm2)RSB_STDOUT_COO_MATRIX_SUMMARY(cm2);
3317 #endif /* RSB_WANT_VERBOSE_MESSAGES */
3318 #endif /* RSB_ALLOW_STDOUT */
3319 	return no;
3320 #undef	RSB_GOTO_DIFFERING
3321 }
3322 
rsb__is_coo_matrix_empty(const struct rsb_coo_matrix_t * cm,rsb_flags_t flags)3323 static rsb_bool_t rsb__is_coo_matrix_empty(const struct rsb_coo_matrix_t *cm, rsb_flags_t flags)
3324 {
3325 	if(!cm)
3326 		return RSB_BOOL_FALSE;
3327 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_UNIT_DIAG_IMPLICIT))
3328 		return RSB_BOOL_FALSE;
3329 	if(cm->nnz==0)
3330 		return RSB_BOOL_TRUE;
3331 	return (rsb_check_for_nonzeros(cm->VA,cm->nnz,cm->typecode)==0)?RSB_BOOL_TRUE:RSB_BOOL_FALSE;
3332 }
3333 
rsb__are_coo_matrices_both_empty(const struct rsb_coo_matrix_t * cm1,rsb_flags_t flags1,const struct rsb_coo_matrix_t * cm2,rsb_flags_t flags2)3334 rsb_bool_t rsb__are_coo_matrices_both_empty(const struct rsb_coo_matrix_t *cm1, rsb_flags_t flags1, const struct rsb_coo_matrix_t *cm2, rsb_flags_t flags2)
3335 {
3336 	rsb_bool_t im1e = RSB_BOOL_FALSE,im2e = RSB_BOOL_FALSE,abme = RSB_BOOL_FALSE;
3337 	im1e = rsb__is_coo_matrix_empty(cm1,flags1);
3338 	im2e = rsb__is_coo_matrix_empty(cm2,flags2);
3339 	abme = RSB_BOOL_OR(im1e,im2e);
3340 	return abme;
3341 }
3342 
rsb__are_coo_matrices_equal_or_both_empty(const struct rsb_coo_matrix_t * cm1,rsb_flags_t flags1,const struct rsb_coo_matrix_t * cm2,rsb_flags_t flags2)3343 rsb_bool_t rsb__are_coo_matrices_equal_or_both_empty(const struct rsb_coo_matrix_t *cm1, rsb_flags_t flags1, const struct rsb_coo_matrix_t *cm2, rsb_flags_t flags2)
3344 {
3345 	rsb_bool_t acme = rsb__are_coo_matrices_equal(cm1,cm2);
3346 	if(acme)
3347 		;
3348 	else
3349 		acme = rsb__are_coo_matrices_both_empty(cm1,flags1,cm2,flags2);
3350 	return acme;
3351 }
3352 
rsb__get_row_dense(const struct rsb_mtx_t * mtxAp,void * row,rsb_coo_idx_t i)3353 rsb_err_t rsb__get_row_dense(const struct rsb_mtx_t * mtxAp, void* row, rsb_coo_idx_t i )
3354 {
3355 	/*!
3356 	 * \ingroup gr_mops
3357 	 * Will write entire row i of matrix matrix in the row vector.
3358 	 *
3359 	 * \param i the specified row
3360 	 * \param row an already allocated vector of the same type as the matrix
3361 	 * \param mtxAp is a valid pointer to a rsb_mtx_t structure
3362 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3363 	 *
3364 	 * FIXME: this function is unfinished.
3365 	 * */
3366 	if(!mtxAp)
3367 		return RSB_ERR_BADARGS;
3368 
3369 	RSB_BZERO( row , mtxAp->el_size * mtxAp->nc);
3370 
3371 	return rsb__do_get_row_dense(mtxAp, row, i );
3372 }
3373 
rsb_spmv_unua(const struct rsb_mtx_t * mtxAp,const void * x,void * y,rsb_trans_t transA)3374 rsb_err_t rsb_spmv_unua(const struct rsb_mtx_t * mtxAp, const void * x, void * y, rsb_trans_t transA)
3375 {
3376 	/*!
3377 	 * \ingroup gr_mops
3378 	 * computes \f$y \leftarrow y - op(A) \cdot x \f$
3379 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3380 	 *
3381 	 * */
3382 	rsb_aligned_t mone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
3383 	rsb__util_set_area_to_converted_integer(&mone[0],mtxAp->typecode,-1);
3384 	return rsb__do_spmv_general(transA,&mone[0],mtxAp,x,1,NULL,y,1,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3385 }
3386 
rsb_spmv_uaua(const struct rsb_mtx_t * mtxAp,const void * rhs,void * out,rsb_trans_t transA)3387 rsb_err_t rsb_spmv_uaua(const struct rsb_mtx_t * mtxAp, const void * rhs, void * out, rsb_trans_t transA)
3388 {
3389 	/*!
3390 	 * \ingroup gr_mops
3391 	 * computes \f$y \leftarrow y + op(A) \cdot x \f$
3392 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3393 	 *
3394 	 * */
3395 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3396 	errval = rsb__do_spmv_general(transA,NULL,mtxAp,rhs,1,NULL,out,1,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3397 	RSB_DO_ERR_RETURN(errval)
3398 }
3399 
rsb_spmv_uauz(const struct rsb_mtx_t * mtxAp,const void * rhs,void * out,rsb_trans_t transA)3400 rsb_err_t rsb_spmv_uauz(const struct rsb_mtx_t * mtxAp, const void * rhs, void * out, rsb_trans_t transA)
3401 {
3402 	/*!
3403 	 * \ingroup gr_mops
3404 	 * computes \f$y \leftarrow op(A) \cdot x \f$
3405 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3406 	 * */
3407 
3408 	/* FIXME : TEMPORARY */
3409 	rsb_aligned_t zero[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
3410 
3411 	if(!mtxAp)
3412 		return RSB_ERR_BADARGS;
3413 	rsb__util_set_area_to_converted_integer(&zero[0],mtxAp->typecode,0);
3414 	return rsb__do_spmv_general(transA,NULL,mtxAp,rhs,1,&zero[0],out,1,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3415 }
3416 
3417 #if 0
3418 static rsb_err_t rsb_spmv_sxsx(const struct rsb_mtx_t * mtxAp, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_coo_idx_t incx, rsb_coo_idx_t incy)
3419 {
3420 	/*!
3421 	 * \ingroup gr_mops
3422 	 * computes \f$y \leftarrow \beta \cdot y + \alpha\cdot A\cdot x\f$
3423 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3424 	 */
3425 	if(!alphap || !betap)
3426 		return RSB_ERR_BADARGS;
3427 	return rsb__do_spmv_general(transA,alphap,mtxAp,x,incx,betap,y,incy,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3428 }
3429 #endif
3430 
3431 
3432 
rsb__load_matrix_file_as_binary(const rsb_char_t * filename,rsb_err_t * errvalp)3433 struct rsb_mtx_t * rsb__load_matrix_file_as_binary(const rsb_char_t * filename, rsb_err_t *errvalp)
3434 {
3435 	/*!
3436 	 */
3437 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3438 
3439 	struct rsb_mtx_t *mtxAp = NULL;
3440 	if(!errvalp || !filename)
3441 	{
3442 		errval = RSB_ERR_BADARGS;
3443 	}
3444 	else
3445 		errval = rsb__do_load_matrix_file_as_binary(&mtxAp,filename);
3446 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3447 	return mtxAp;
3448 }
3449 
rsb__do_spsm(rsb_trans_t transT,const void * alphap,const struct rsb_mtx_t * mtxAp,rsb_coo_idx_t nrhs,rsb_flags_t order,const void * betap,const void * b,rsb_nnz_idx_t ldb,void * c,rsb_nnz_idx_t ldc)3450 rsb_err_t rsb__do_spsm(rsb_trans_t transT, const void * alphap, const struct rsb_mtx_t * mtxAp, rsb_coo_idx_t nrhs, rsb_flags_t order, const void * betap, const void * b, rsb_nnz_idx_t ldb, void * c, rsb_nnz_idx_t ldc)
3451 {
3452 	/*!
3453 	 * \ingroup gr_internals
3454 	 * When x is a multivector with nrhs elements, b elements having stride ldb and c elements having stride ldc
3455 	 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3456 	 * */
3457 	 /* FIXME : and error detection ? **/
3458 	 /* FIXME : UNTESTED, UNFINISHED **/
3459 	rsb_coo_idx_t l = 0;
3460 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3461 
3462 #if RSB_ALLOW_ZERO_DIM
3463 	if(RSB_ANY_MTX_DIM_ZERO(mtxAp))
3464 	{
3465 		goto err; /* FIXME: skipping further checks */
3466 	}
3467 #endif
3468 
3469 	if(!mtxAp || !b || !ldb || !c || !ldc || !nrhs /*  transA*/ || !alphap || !betap ||
3470 		( order != RSB_FLAG_WANT_COLUMN_MAJOR_ORDER && order != RSB_FLAG_WANT_ROW_MAJOR_ORDER ) )
3471 	{
3472 		errval = RSB_ERR_BADARGS;
3473 		goto err;
3474 	}
3475 
3476 
3477 	if( 0
3478 #if RSB_ENABLE_INNER_NRHS_SPSV
3479 		|| ( rsb_global_session_handle.want_outer_spmm==0 )
3480 #endif
3481 	  ) /* 0 == yes TODO: need want_outer_spsm here */
3482 	{
3483 		size_t outnri = ldc, rhsnri = ldb;
3484 
3485 		if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3486 			RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv_general(transT,alphap,mtxAp,b,   1,c,   1,RSB_OP_FLAG_DEFAULT RSB_INNER_NRHS_SPSV_ARGS_IDS));
3487 		else
3488 			RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv_general(transT,alphap,mtxAp,b,nrhs,c,nrhs,RSB_OP_FLAG_DEFAULT RSB_INNER_NRHS_SPSV_ARGS_IDS));
3489 	}
3490 	else
3491 	{
3492 		if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3493 			/*  column major */
3494 			for(l=0;l<nrhs;++l)
3495 				RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv(transT,alphap,mtxAp, ((const rsb_byte_t*)b)+(mtxAp->el_size*ldb)*l,1, ((rsb_byte_t*)c)+(mtxAp->el_size*ldc)*l,1));
3496 		else
3497 			/*  row major */
3498 			for(l=0;l<nrhs;++l)
3499 				RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv(transT,alphap,mtxAp, ((const rsb_byte_t*)b)+(mtxAp->el_size+l),ldb, ((rsb_byte_t*)c)+(mtxAp->el_size+l),ldc));
3500 	}
3501 err:
3502 	RSB_DO_ERR_RETURN(errval)
3503 }
3504 
rsb__util_sort_row_major_buffered(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,rsb_flags_t flags,void * WA,size_t wb)3505 rsb_err_t rsb__util_sort_row_major_buffered(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k,  rsb_type_t typecode , rsb_flags_t flags, void * WA, size_t wb )
3506 {
3507 	/*!
3508 
3509 	   Will sort as CSR (or CSC) the given coefficients.
3510 	   Will ignore any fancy sorting flags.
3511 
3512 	   \param \rsb_wr_va_ia_ja_desc_msg
3513 	   \param \rsb_flags_inp_param_msg
3514 	   \param \rsb_nnz_inp_param_msg
3515 	   \param \rsb_nrows_inp_param_msg
3516 	   \param \rsb_ncols_inp_param_msg
3517 	   \param \rsb_type_param_msg
3518 	   \return \rsberrcodemsg
3519 	*/
3520 	// FIXME : should handle error conditions
3521 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3522 	//struct rsb_mtx_partitioning_info_t pinfop;
3523 
3524 	if(RSB_MATRIX_UNSUPPORTED_TYPE(typecode))
3525 	{
3526 		errval = RSB_ERR_UNSUPPORTED_TYPE;goto err;
3527 	}
3528 #if 0
3529 	pinfop.rpntr = rsb__util_get_partitioning_array( 1, m , &pinfop.M_b, flags),
3530 	pinfop.cpntr = rsb__util_get_partitioning_array( 1, k , &pinfop.K_b, flags),
3531 	rsb__pinfo_init( &pinfop, pinfop.M_b, pinfop.K_b, pinfop.rpntr, pinfop.cpntr, m,k);/* FIXME : is this ok ? */
3532 	errval = rsb__do_util_sortcoo(VA,IA,JA,m,k,nnz,typecode,&pinfop,flags,NULL,0);
3533 	RSB_CONDITIONAL_FREE(pinfop.rpntr);
3534 	RSB_CONDITIONAL_FREE(pinfop.cpntr);
3535 #else
3536 	//Remove internal support for RSB_FLAG_EXPERIMENTAL_IN_PLACE_PERMUTATION_SORT):
3537 	//RSB_DO_FLAG_SUBST(flags,RSB_INTERNAL_FLAG_CSR_SORTING_MASK,RSB_FLAG_WANT_BCSS_STORAGE|RSB_FLAG_EXPERIMENTAL_IN_PLACE_PERMUTATION_SORT);
3538 	RSB_DO_FLAG_SUBST(flags,RSB_INTERNAL_FLAG_CSR_SORTING_MASK,RSB_FLAG_WANT_BCSS_STORAGE);
3539 	errval = rsb__do_util_sortcoo(VA,IA,JA,m,k,nnz,typecode,NULL,flags,WA,wb);
3540 #endif
3541 err:
3542 	RSB_DO_ERR_RETURN(errval)
3543 }
3544 
rsb__basename(const rsb_char_t * path)3545 const rsb_char_t *rsb__basename(const rsb_char_t *path)
3546 {
3547 	rsb_int_t sl;
3548 
3549 	if(!path)
3550 		return path;
3551 	sl = rsb__util_strlen(path);
3552 	while(sl>0 && path[sl-1]!=RSB_DIR_SEPARATOR)
3553 		--sl;
3554 	return path+sl;
3555 }
3556 
rsb__do_set_elements(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_flags_t flags)3557 rsb_err_t rsb__do_set_elements(struct rsb_mtx_t * mtxAp, const void * VA, const rsb_coo_idx_t *IA, const rsb_coo_idx_t *JA, rsb_nnz_idx_t nnz, rsb_flags_t flags)
3558 {
3559 	rsb_coo_idx_t k;
3560 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3561 	rsb_nnz_idx_t ifo = ( flags & RSB_FLAG_FORTRAN_INDICES_INTERFACE )?1:0;
3562 
3563 	if(!IA || !VA || !JA || !mtxAp)
3564 	{
3565 		errval = RSB_ERR_BADARGS;
3566 		goto err;
3567 	}
3568 
3569 #if RSB_WANT_COO_BEGIN
3570 	if( RSB_MTX_HBDF( mtxAp) )
3571 	{
3572 		errval = rsb__BLAS_Xuscr_insert_entries(RSB_MTX_HBDFH(mtxAp),nnz,VA,IA,JA);
3573 		goto err;
3574 	}
3575 #endif /* RSB_WANT_COO_BEGIN */
3576 	for(k=0;k<nnz;++k)
3577 		errval |= rsb__do_upd_coo_element(mtxAp,((const rsb_char_t*)VA)+mtxAp->el_size*k,IA[k]-ifo,JA[k]-ifo,flags);
3578 err:
3579 	RSB_DO_ERR_RETURN(errval)
3580 }
3581 
rsb__set_ldX_for_spmm(rsb_trans_t transA,const struct rsb_mtx_t * mtxAp,rsb_coo_idx_t nrhs,rsb_flags_t order,rsb_nnz_idx_t * ldBp,rsb_nnz_idx_t * ldCp)3582 rsb_err_t rsb__set_ldX_for_spmm(rsb_trans_t transA, const struct rsb_mtx_t * mtxAp, rsb_coo_idx_t nrhs, rsb_flags_t order, rsb_nnz_idx_t * ldBp, rsb_nnz_idx_t * ldCp)
3583 {
3584 	if ( order == RSB_FLAG_WANT_ROW_MAJOR_ORDER )
3585 	{
3586 		if( !*ldBp )
3587 			*ldBp = nrhs;
3588 		if( !*ldCp )
3589 			*ldCp = nrhs;
3590 	}
3591 	else
3592 	if ( order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
3593 	{
3594 		if( !*ldBp )
3595 		{
3596 			if ( transA == RSB_TRANSPOSITION_N )
3597 				*ldBp = mtxAp->nc;
3598 			else
3599 				*ldBp = mtxAp->nr;
3600 		}
3601 		if( !*ldCp )
3602 		{
3603 			if ( transA == RSB_TRANSPOSITION_N )
3604 				*ldCp = mtxAp->nr;
3605 			else
3606 				*ldCp = mtxAp->nc;
3607 		}
3608 	}
3609 	else
3610 		goto err;
3611 
3612 	if( !(*ldBp) || (!*ldCp) )
3613 		goto err;
3614 
3615 	if ( order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
3616 	{
3617 		if( *ldCp < rsb__do_get_rows_of(mtxAp,transA) || *ldBp < rsb__do_get_columns_of(mtxAp,transA) )
3618 			RSB_PERR_GOTO(err,RSB_ERRM_ES);
3619 	}
3620 	else
3621 	if ( order == RSB_FLAG_WANT_ROW_MAJOR_ORDER )
3622 	{
3623 		if( *ldCp < nrhs || *ldBp < nrhs )
3624 			RSB_PERR_GOTO(err,RSB_ERRM_ES);
3625 	}
3626 
3627 	return RSB_ERR_NO_ERROR;
3628 err:
3629 	return RSB_ERR_BADARGS;
3630 }
3631 
rsb__do_spmm(rsb_trans_t transA,const void * alphap,const struct rsb_mtx_t * mtxAp,rsb_coo_idx_t nrhs,rsb_flags_t order,const void * b,rsb_nnz_idx_t ldb,const void * betap,void * c,rsb_nnz_idx_t ldc,enum rsb_op_flags_t op_flags)3632 rsb_err_t rsb__do_spmm(rsb_trans_t transA, const void * alphap, const struct rsb_mtx_t * mtxAp, rsb_coo_idx_t nrhs, rsb_flags_t order, const void * b, rsb_nnz_idx_t ldb, const void * betap, void * c, rsb_nnz_idx_t ldc, enum rsb_op_flags_t op_flags)
3633 {
3634 	/*!
3635 	   \ingroup rsb_doc_matrix_operations
3636 
3637 	   FIXME: this function name is WRONG
3638 
3639 	   when x is a multivector with nrhs elements, b elements having stride ldb and c elements having stride ldc
3640 
3641 	   \return \rsberrcodemsg
3642 	 */
3643 	 /* FIXME : and error detection ?
3644 	  * e.g.:
3645 	    if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER && ldb<mtxAp->nr && transA=...)
3646 	  * **/
3647 	 /* TODO: incx,incy  **/
3648 	rsb_coo_idx_t l = 0;
3649 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3650 	const rsb_coo_idx_t incx = 1,incy = 1;
3651 	const rsb_byte_t * bp = b;
3652        	rsb_byte_t * cp = c;
3653 	rsb_aligned_t pone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
3654 
3655 #if RSB_ALLOW_ZERO_DIM
3656 	if(RSB_ANY_MTX_DIM_ZERO(mtxAp))
3657 		goto err; /* FIXME: skipping checks on ldB, ldC, op_flags*/
3658 #endif
3659 
3660 	if(!mtxAp || !b || !c || !nrhs /*  transA*/ ||
3661 		( order != RSB_FLAG_WANT_COLUMN_MAJOR_ORDER && order != RSB_FLAG_WANT_ROW_MAJOR_ORDER ) )
3662 	{
3663 		errval = RSB_ERR_BADARGS;
3664 		goto err;
3665 	}
3666 
3667 	if(!alphap || !betap)
3668 	{
3669 		rsb__fill_with_ones(&pone[0],mtxAp->typecode,1,1);
3670 		if(!alphap)
3671 			alphap = &pone[0];
3672 		if(!betap)
3673 			betap = &pone[0];
3674 	}
3675 
3676 	errval = rsb__set_ldX_for_spmm(transA, mtxAp, nrhs, order, &ldb, &ldc);
3677 	if(RSB_SOME_ERROR(errval))
3678 	{
3679 		errval = RSB_ERR_BADARGS;
3680 		goto err;
3681 	}
3682 
3683 #ifdef RSB_HAVE_OPTYPE_SPMM_AZ
3684 	/*
3685 		return ...
3686 	 SPMM_AZ is not yet complete
3687 	 */
3688 #endif /* RSB_HAVE_OPTYPE_SPMM_AZ */
3689 	if( rsb_global_session_handle.want_outer_spmm==0 ) /* 0 == yes */
3690 	{
3691 		/* inner loop: fast */
3692 		if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3693 			RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp, bp,incx, betap, cp,incy,op_flags,nrhs,ldc, ldb  ));
3694 		else
3695 			RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp, bp,ldb , betap, cp, ldc ,op_flags,nrhs,incx,incy));
3696 	}
3697 	else
3698 	{
3699 		/* outer loop: slow */
3700 		if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3701 			for(l=0;l<nrhs;++l)
3702 				RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp,bp+(mtxAp->el_size*ldb)*l,incx, betap, cp+(mtxAp->el_size*ldc)*l,incy,op_flags RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS));
3703 		else
3704 			for(l=0;l<nrhs;++l)
3705 				RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp,bp+(mtxAp->el_size+l  )  ,ldb , betap, cp+(mtxAp->el_size+l  )  ,ldc ,op_flags RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS));
3706 	}
3707 err:
3708 	RSB_DO_ERR_RETURN(errval)
3709 
3710 }
3711 
rsb__do_spmm_general(const struct rsb_mtx_t * mtxAp,const void * b,void * c,const void * alphap,const void * betap,rsb_coo_idx_t incx,rsb_coo_idx_t incy,rsb_trans_t transA,enum rsb_op_flags_t op_flags,rsb_flags_t order,const rsb_int_t nrhs,const size_t outnri,const size_t rhsnri)3712 rsb_err_t rsb__do_spmm_general(const struct rsb_mtx_t * mtxAp, const void * b, void * c, const void *alphap, const void * betap, rsb_coo_idx_t incx, rsb_coo_idx_t incy, rsb_trans_t transA, enum rsb_op_flags_t op_flags, rsb_flags_t order,const rsb_int_t nrhs, const size_t outnri, const size_t rhsnri)
3713 {
3714 	/* */
3715 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3716 
3717 	if(nrhs != 1)
3718 		errval = rsb__do_spmm(transA, alphap, mtxAp, nrhs, order, b, rhsnri, betap, c, outnri, op_flags);
3719 	else
3720 		errval = rsb__do_spmv_general(transA, alphap, mtxAp, b, incx, betap, c, incy, op_flags RSB_OUTER_NRHS_SPMV_ARGS_IDS);
3721 	return errval;
3722 }
3723 
rsb__do_transpose(struct rsb_mtx_t ** mtxApp,rsb_bool_t want_conj)3724 rsb_err_t rsb__do_transpose(struct rsb_mtx_t ** mtxApp, rsb_bool_t want_conj)
3725 {
3726 	// TODO: and what to to if data arrays are externally allocated ?
3727 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3728 	struct rsb_mtx_t*tmatrix = NULL;
3729 	struct rsb_mtx_t*mtxAp = NULL;
3730 	struct rsb_coo_matrix_t coo;
3731 	struct rsb_mtx_t *fm = NULL;
3732 
3733 	if(!mtxApp || !*mtxApp)
3734 	{
3735 		errval = RSB_ERR_BADARGS;
3736 		RSB_PERR_GOTO(err, RSB_ERRM_E_MTXAP);
3737 	}
3738 	mtxAp = *mtxApp;
3739 	RSB_DO_FLAG_FLIP_UPLO(mtxAp->flags);
3740 	RSB_INIT_CXX_FROM_MTX(&coo,mtxAp);
3741 	if(rsb__allocate_coo_matrix_t(&coo)!=&coo)
3742 	{
3743 		RSB_PERR_GOTO(err,RSB_ERRM_ES);
3744 	}
3745 	errval = rsb__do_get_rows_sparse(RSB_TRANSPOSITION_N,NULL,mtxAp,coo.VA,coo.IA,coo.JA,0,mtxAp->nr-1,&coo.nnz,RSB_FLAG_NOFLAGS);
3746 	if(RSB_SOME_ERROR(errval))
3747 	{
3748 		goto err;
3749 	}
3750 	fm = rsb__do_get_first_submatrix(mtxAp);
3751 	if(!fm)
3752 		goto err; // FIXME
3753 	if(want_conj)
3754 		errval = rsb__util_do_conjugate(coo.VA,coo.typecode,coo.nnz);
3755 	if(RSB_SOME_ERROR(errval))goto err;
3756 	RSB_SWAP(rsb_coo_idx_t*,coo.IA,coo.JA);
3757 	RSB_SWAP(rsb_coo_idx_t,coo.nr,coo.nc);
3758 	RSB_COO_MEMCPY_parallel(fm->VA,fm->bpntr,fm->bindx,coo.VA,coo.IA,coo.JA,0,0,coo.nnz,fm->el_size);
3759  	RSB_DO_FLAG_DEL(mtxAp->flags,RSB_FLAG_SORTED_INPUT);
3760 	tmatrix = rsb__mtx_alloc_inner(fm->VA,fm->bpntr,fm->bindx,coo.nnz,0,0,coo.typecode,coo.nr,coo.nc,RSB_DEFAULT_ROW_BLOCKING,RSB_DEFAULT_COL_BLOCKING,mtxAp->flags,&errval);
3761  	RSB_DO_FLAG_ADD(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3762 	rsb__destroy_inner(mtxAp);
3763 	rsb__destroy_coo_matrix_t(&coo);
3764 	*mtxApp = tmatrix;
3765 
3766 	//RSB_ERROR("FIXME: should implement tranpose functionality !");
3767 err:
3768 	RSB_DO_ERR_RETURN(errval)
3769 }
3770 
rsb__do_get_elements(const struct rsb_mtx_t * mtxAp,void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_flags_t flags)3771 rsb_err_t rsb__do_get_elements(const struct rsb_mtx_t * mtxAp, void * VA, const rsb_coo_idx_t *IA, const rsb_coo_idx_t *JA, rsb_nnz_idx_t nnz, rsb_flags_t flags)
3772 {
3773 	rsb_coo_idx_t k;
3774 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3775 	rsb_nnz_idx_t ifo = ( flags & RSB_FLAG_FORTRAN_INDICES_INTERFACE )?1:0;
3776 
3777 	if(!IA || !VA || !JA || !mtxAp)
3778 	{
3779 		errval = RSB_ERR_BADARGS;
3780 		goto err;
3781 	}
3782 
3783 	for(k=0;k<nnz;++k)
3784 		errval |= rsb__do_get_coo_element(mtxAp,((rsb_char_t*)VA)+mtxAp->el_size*k,IA[k]-ifo,JA[k]-ifo);
3785 err:
3786 	RSB_DO_ERR_RETURN(errval)
3787 }
3788 
rsb__do_set_initopt_as_string(const rsb_char_t * opn,const rsb_char_t * arg)3789 rsb_err_t rsb__do_set_initopt_as_string(const rsb_char_t *opn, const rsb_char_t *arg)
3790 {
3791 	/* FIXME: document me */
3792 	return rsb__stropts_set(opn,arg);
3793 }
3794 
rsb__do_lib_get_info_str(int what,rsb_char_t * sbuf,size_t buflen)3795 rsb_err_t rsb__do_lib_get_info_str(int what, rsb_char_t* sbuf, size_t buflen)
3796 {
3797 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3798 	size_t rl = buflen;
3799 
3800 	if( sbuf == NULL )
3801 	{
3802 		errval = RSB_ERR_BADARGS;
3803 		goto err;
3804 	}
3805 
3806 	sbuf[0] = RSB_NUL;
3807 #ifdef RSB_CC
3808 	snprintf(sbuf+(buflen-rl),rl,"CC=%s ",    RSB_CC    );
3809 #else /* RSB_CC */
3810 	errval |= RSB_ERR_GENERIC_ERROR;
3811 #endif /* RSB_CC */
3812 	rl -= strlen(sbuf);
3813 #ifdef RSB_CFLAGS
3814 	snprintf(sbuf+(buflen-rl),rl,"CFLAGS=%s",RSB_CFLAGS);
3815 #else /* RSB_CFLAGS */
3816 	errval |= RSB_ERR_GENERIC_ERROR;
3817 #endif /* RSB_CFLAGS */
3818 err:
3819 	return errval;
3820 }
3821 
rsb__do_mtx_alloc_from_coo_const(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3822 struct rsb_mtx_t * rsb__do_mtx_alloc_from_coo_const(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3823 
3824 {
3825 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3826 	void *VA_ = NULL;
3827 	rsb_coo_idx_t *IA_ = NULL, *JA_ = NULL;
3828 	struct rsb_mtx_t * mtxAp = NULL;
3829 
3830 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
3831 	{
3832 		errval = /*RSB_ERR_BADARGS|*/RSB_ERR_COULD_NOT_HONOUR_EXTERNALLY_ALLOCATION_FLAGS;
3833 		RSB_PERR_GOTO(err,RSB_ERRM_CNHEAF);
3834 	}
3835 
3836 	RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3837 
3838 	if(nnzA>0)
3839 	{
3840 		rsb_coo_idx_t offi = 0;
3841 
3842 		if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3843 			offi = 1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3844 		errval = rsb__util_coo_alloc_copy_and_stats(&VA_,&IA_,&JA_,VA,IA,JA,nrA?NULL:&nrA,ncA?NULL:&ncA,nnzA,0,typecode,offi,0,RSB_FLAG_NOFLAGS,&flags);
3845 
3846 		if(!VA_ || !IA_ || !JA_)
3847 		{
3848 			errval = RSB_ERR_ENOMEM;
3849 			RSB_PERR_GOTO(err,RSB_ERRM_E_VIJ);
3850 		}
3851 	}
3852 	else
3853 	{
3854 #if !RSB_ALLOW_EMPTY_MATRICES
3855 		/* FIXUP CASE FOR 0-NNZ MATRICES AND IMPLICIT DIAGONAL */
3856 		if(RSB_INVALID_NNZ_COUNT_FOR_FLAGS(nnzA,flags))
3857 		{
3858 			errval = RSB_ERR_BADARGS;
3859 			RSB_PERR_GOTO(err,RSB_ERRM_CBAEM);
3860 		}
3861 #endif /* RSB_ALLOW_EMPTY_MATRICES */
3862 	}
3863 	RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3864 
3865 	mtxAp = rsb__mtx_alloc_inner(VA_,IA_,JA_,nnzA,0,0,typecode,nrA,ncA,brA,bcA,flags,&errval);
3866 	if(mtxAp && errval == RSB_ERR_NO_ERROR)
3867 		goto ok;
3868 	/* FIXME: and if !matrix but errval ? */
3869 err:
3870 	RSB_CONDITIONAL_FREE(IA_);
3871 	RSB_CONDITIONAL_FREE(JA_);
3872 	RSB_CONDITIONAL_FREE(VA_);
3873 ok:
3874 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3875 	return mtxAp;
3876 }
3877 
rsb__do_mtx_alloc_from_coo_inplace(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3878 struct rsb_mtx_t * rsb__do_mtx_alloc_from_coo_inplace(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3879 {
3880 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3881 	struct rsb_mtx_t * mtxAp = NULL;
3882 	rsb_coo_idx_t roff = 0,coff = 0;
3883 
3884 	RSB_ASSERT(VA || nnzA == 0 );
3885 	RSB_ASSERT(IA || nnzA == 0 );
3886 	RSB_ASSERT(JA || nnzA == 0 );
3887 
3888 	if(RSB_MATRIX_UNSUPPORTED_TYPE(typecode))
3889 	{
3890 		errval = RSB_ERR_UNSUPPORTED_TYPE;
3891 		RSB_PERR_GOTO(err,RSB_ERRM_EM);
3892 	}
3893 	RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3894 	RSB_DO_FLAG_ADD(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3895 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3896 		roff = -1,coff = -1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3897 	mtxAp = rsb__mtx_alloc_inner(VA,IA,JA,nnzA,roff,coff,typecode,nrA,ncA,brA,bcA,flags,&errval);
3898 err:
3899 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3900 	return mtxAp;
3901 }
3902 
rsb__do_mtx_alloc_from_csr_const(const void * VA,const rsb_coo_idx_t * RP,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3903 struct rsb_mtx_t * rsb__do_mtx_alloc_from_csr_const(const void *VA, const rsb_coo_idx_t * RP, const rsb_coo_idx_t * JA, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3904 {
3905 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3906 	void *VA_ = NULL;
3907 	rsb_coo_idx_t *IA_ = NULL,*JA_ = NULL;
3908 	struct rsb_mtx_t * mtxAp = NULL;
3909 	size_t cnnz = RSB_MAX(nnzA,nrA+1), cis = sizeof(rsb_coo_idx_t),nvs = RSB_SIZEOF(typecode);
3910 	rsb_coo_idx_t roff = 0,coff = 0;
3911 
3912 	RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3913 
3914 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
3915 	{
3916 		errval = /*RSB_ERR_BADARGS|*/RSB_ERR_COULD_NOT_HONOUR_EXTERNALLY_ALLOCATION_FLAGS;
3917 		RSB_PERR_GOTO(err,RSB_ERRM_BFEANS);
3918 	}
3919 
3920 	IA_ = rsb__clone_area_with_extra(RP,cis*(nrA+1),0,cis*(cnnz-(nrA+1)));
3921 	JA_ = rsb__clone_area_with_extra(JA,cis*(nnzA ),0,cis*(cnnz-nnzA));
3922 	VA_ = rsb__clone_area_with_extra(VA,nvs*(nnzA ),0,nvs*(cnnz-nnzA));
3923 
3924 	if(!VA_ || !IA_ || !JA_)
3925 	{
3926 		errval = RSB_ERR_ENOMEM;
3927 		RSB_PERR_GOTO(err,RSB_ERRM_E_VIJ);
3928 	}
3929 	errval = rsb__util_uncompress_row_pointers_array((rsb_coo_idx_t*)RP,nrA,flags,RSB_FLAG_C_INDICES_INTERFACE,IA_);
3930 
3931 	if(RSB_SOME_ERROR(errval))
3932 	{
3933 		RSB_PERR_GOTO(err,RSB_ERRM_EM);
3934 	}
3935 	if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3936 		coff = -1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3937 
3938 	RSB_DEBUG_ASSERT(roff>=-1 && coff>=-1); /* for Fortran */
3939 	RSB_DO_FLAG_ADD(flags,RSB_FLAG_SORTED_INPUT);
3940 	mtxAp = rsb__mtx_alloc_inner(VA_,IA_,JA_,nnzA,roff,coff,typecode,nrA,ncA,brA,bcA,flags,errvalp);
3941 err:
3942 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3943 	return mtxAp;
3944 }
3945 
rsb__do_mtx_alloc_from_csc_const(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * CP,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3946 struct rsb_mtx_t * rsb__do_mtx_alloc_from_csc_const(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * CP, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3947 {
3948 	rsb_err_t errval = RSB_ERR_NO_ERROR;
3949 	void *VA_ = NULL;
3950 	rsb_coo_idx_t *IA_ = NULL,*JA_ = NULL;
3951 	struct rsb_mtx_t * mtxAp = NULL;
3952 	rsb_nnz_idx_t maxdim = RSB_MAX(nnzA,RSB_MAX(nrA+1,ncA+1));
3953 
3954 	RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3955 
3956 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
3957 	{
3958 		errval = /*RSB_ERR_BADARGS|*/RSB_ERR_COULD_NOT_HONOUR_EXTERNALLY_ALLOCATION_FLAGS;
3959 		RSB_PERR_GOTO(err,RSB_ERRM_EM);
3960 	}
3961 
3962 	if(nnzA>0)
3963 	{
3964 		rsb_time_t dt;
3965 		rsb_coo_idx_t offi = 0;
3966 
3967 		if(RSB_SOME_ERROR(errval = rsb__util_coo_alloc(&VA_,&IA_,&JA_,maxdim,typecode,RSB_BOOL_FALSE)))
3968 		{
3969 			RSB_PERR_GOTO(err,RSB_ERRM_EM);
3970 		}
3971 		if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3972 			offi = 1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3973 		//errval=
3974 		dt = - rsb_time();
3975 		rsb_util_csc2csr(VA,IA,CP,VA_,IA_,JA_,nrA,ncA,nnzA,typecode,offi,0,&flags);/* FIXME: assembly shall give the user chance to pass offo and offi */
3976 		dt += rsb_time();
3977 		//printf("csc 2 csr took %lg s\n",dt);
3978 	}
3979 	else
3980 	{
3981 	}
3982 	if(RSB_SOME_ERROR(errval))
3983 	{
3984 		RSB_PERR_GOTO(err,RSB_ERRM_EM);
3985 	}
3986 	RSB_DO_FLAG_ADD(flags,RSB_FLAG_SORTED_INPUT);
3987 	mtxAp = rsb_mtx_alloc_from_csr_inplace (VA_,IA_,JA_,nnzA,typecode,nrA,ncA,brA,bcA,flags,errvalp);
3988 	if(mtxAp)
3989 		RSB_DO_FLAG_DEL(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3990 err:
3991 	RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3992 	return mtxAp;
3993 }
3994 
3995 
3996 /* @endcond */
3997