1 /*
2
3 Copyright (C) 2008-2021 Michele Martone
4
5 This file is part of librsb.
6
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20
21 */
22 /* @cond INNERDOC */
23 /**
24 * @file
25 * @author Michele Martone
26 * @brief
27 *
28 * Low level routines and tools for our sparse matrix formats implementations.
29 * \internal
30 *
31 * */
32 #include "rsb_common.h"
33 #include "rsb_util.h"
34 #include "rsb.h"
35 #include "rsb_types.h"
36 #include "rsb_unroll.h"
37 #ifdef RSB_HAVE_SYS_UTSNAME_H
38 #include <sys/utsname.h> /* uname */
39 #endif /* RSB_HAVE_SYS_UTSNAME_H */
40
41 #define RSB_WANT_NULL_ALLOCATED_ZERO_NNZ_COO_MATRICES_ARRAYS 1 /* 20110419 a bugfix for the nnz == 0 case vs realloc and memory counters */
42 #define RSB_TOKLEN 16
43 #define RSB_WANT_ZERO_ON_DESTROY 0 /* a useful debug option */
44 #define rsb__strcpy strcpy
45
46 #define RSB_ILLEGAL_FLAGS 0xFFFFFFFF
47 #define RSB_WANT_DOUBLE_MATRIX_FREE_DETECT 0
48
49 #if RSB_WANT_DOUBLE_MATRIX_FREE_DETECT
50 #define RSB_CONDITIONAL_FREE_MTXAP(MTXAP) if(MTXAP){ if( (MTXAP)->flags == RSB_ILLEGAL_FLAGS ) { RSB_ERROR("Probable attempt to free already freed matrix at %p !\n",(MTXAP)); } (MTXAP)->flags = RSB_ILLEGAL_FLAGS; RSB_CONDITIONAL_FREE(MTXAP); }
51 #else
52 #define RSB_CONDITIONAL_FREE_MTXAP(MTXAP) RSB_CONDITIONAL_FREE(MTXAP)
53 #endif
54
55 RSB_INTERNALS_COMMON_HEAD_DECLS
56
rsb__init_options_t(struct rsb_options_t * o)57 void * rsb__init_options_t(struct rsb_options_t *o)
58 {
59 /*!
60 * \ingroup gr_internals
61 * initializes a rsb_options_t struct to default 'vanilla' values
62 * \return the input address
63 */
64 if(!o)
65 goto err;
66 RSB_BZERO_P(o);
67 err:
68 return o;
69 }
70
rsb__destroy_options_t(struct rsb_options_t * o)71 void * rsb__destroy_options_t(struct rsb_options_t *o)
72 {
73 /*!
74 * \ingroup gr_internals
75 * frees the given options structure and any of its allocated arrays
76 *
77 * \return the input address
78 */
79 if(!o)
80 goto err;
81 RSB_CONDITIONAL_FREE(o->bitmap);
82 RSB_CONDITIONAL_FREE(o);
83 err:
84 return o;
85 }
86
rsb__is_valid_options_t(const struct rsb_options_t * o,rsb_coo_idx_t m,rsb_coo_idx_t k)87 const void * rsb__is_valid_options_t(const struct rsb_options_t *o, rsb_coo_idx_t m, rsb_coo_idx_t k)
88 {
89 /*!
90 * \ingroup gr_internals
91 * checks if the structure members which are to be used for
92 * the creation of a matrix have meaningful values.
93 *
94 * \return the input address in case of success, NULL otherwise
95 * */
96 if(!o)
97 goto err;
98
99 if(RSB_INVALID_COO_INDEX(m) || RSB_INVALID_COO_INDEX(k))
100 goto err;
101
102 /*
103 * someday:
104 *
105 * if(already_initialized) {
106 * if(mtxAp->el_size<1) goto err;
107 * if(! o->bitmap) goto err;
108 * ....
109 */
110 return o;
111 err:
112 return NULL;
113 }
114
rsb__reallocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp,rsb_nnz_idx_t nnnz)115 void * rsb__reallocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp, rsb_nnz_idx_t nnnz)
116 {
117 /*!
118 * \ingroup gr_internals
119 * \return On success, return the input address; on failure, NULL.
120 */
121 size_t es = 0;
122 void * check = NULL;
123
124 if(!cmp)
125 goto err;
126
127 es = RSB_NUMERICAL_TYPE_SIZE(cmp->typecode);
128
129 if(es < 1)
130 goto err;
131
132 if( nnnz == 0 && RSB_WANT_NULL_ALLOCATED_ZERO_NNZ_COO_MATRICES_ARRAYS )
133 {
134 cmp->IA = NULL;
135 cmp->JA = NULL;
136 cmp->VA = NULL;
137 goto done;
138 }
139
140 check = rsb__realloc(cmp->IA,sizeof(rsb_coo_idx_t)*(nnnz));
141 if(!check)
142 goto err;
143 cmp->IA = check;
144
145 check = rsb__realloc(cmp->JA,sizeof(rsb_coo_idx_t)*(nnnz));
146 if(!check)
147 goto err;
148 cmp->JA = check;
149
150 check = rsb__realloc(cmp->VA,es*nnnz);
151 if(!check)
152 goto err;
153 cmp->VA = check;
154
155 if(!cmp->IA || !cmp->JA || !cmp->VA)
156 goto cerr;
157 done:
158 cmp->nnz = nnnz;
159 return cmp;
160 cerr:
161 /* critical error (should not happen) */
162 if(cmp)
163 {
164 RSB_CONDITIONAL_FREE(cmp->IA);
165 RSB_CONDITIONAL_FREE(cmp->JA);
166 RSB_CONDITIONAL_FREE(cmp->VA);
167 }
168 err:
169 return NULL;
170 }
171
rsb__callocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp)172 void * rsb__callocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
173 {
174 return rsb__xallocate_coo_matrix_t(cmp,RSB_BOOL_TRUE,RSB_FLAG_NOFLAGS);
175 }
176
rsb__allocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp)177 void * rsb__allocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
178 {
179 return rsb__xallocate_coo_matrix_t(cmp,RSB_BOOL_FALSE,RSB_FLAG_NOFLAGS);
180 }
181
rsb__xallocate_coo_matrix_t(struct rsb_coo_matrix_t * cmp,rsb_bool_t want_calloc,rsb_flags_t flags)182 void * rsb__xallocate_coo_matrix_t(struct rsb_coo_matrix_t *cmp, rsb_bool_t want_calloc, rsb_flags_t flags)
183 {
184 /*!
185 * \ingroup gr_internals
186 * \return the input address on success, NULL on error
187 */
188 size_t es = 0, nnz = 0, rnz = 0;
189
190 if(!cmp)
191 {
192 RSB_PERR_GOTO(err,RSB_ERRM_ES);
193 }
194 rnz = nnz = cmp->nnz;
195 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_DEFAULT_CSR_MATRIX_FLAGS))
196 rnz = RSB_MAX(nnz,cmp->nr+1);
197
198 es = RSB_NUMERICAL_TYPE_SIZE(cmp->typecode);
199
200 if(es<1)
201 {
202 RSB_ERROR("typecode seem wrong: 0%x\n",cmp->typecode);
203 RSB_PERR_GOTO(err,RSB_ERRM_WTC);
204 }
205 cmp->IA = NULL;
206 cmp->JA = NULL;
207 cmp->VA = NULL;
208 /* the above will avoid problems in case of error */
209
210 if(want_calloc == RSB_BOOL_TRUE)
211 cmp->IA = rsb__calloc(sizeof(rsb_coo_idx_t)*(rnz)),
212 cmp->JA = rsb__calloc(sizeof(rsb_coo_idx_t)*(nnz)),
213 cmp->VA = rsb__calloc_vector(nnz, cmp->typecode);
214 else
215 cmp->IA = rsb__malloc(sizeof(rsb_coo_idx_t)*(rnz)),
216 cmp->JA = rsb__malloc(sizeof(rsb_coo_idx_t)*(nnz)),
217 cmp->VA = rsb__malloc_vector(nnz, cmp->typecode);
218
219 if(!cmp->IA || !cmp->JA || !cmp->VA)
220 {
221 RSB_PERR_GOTO(err,RSB_ERRM_ES);
222 }
223 /*
224 * Note: we do not free cmp itself.
225 */
226 return cmp;
227 err:
228 if(cmp)
229 {
230 RSB_CONDITIONAL_FREE(cmp->IA);
231 RSB_CONDITIONAL_FREE(cmp->JA);
232 RSB_CONDITIONAL_FREE(cmp->VA);
233 }
234 return NULL;
235 }
236
rsb__destroy_coo_matrix_t(struct rsb_coo_matrix_t * cmp)237 void * rsb__destroy_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
238 {
239 /*!
240 * \ingroup gr_internals
241 * frees the given structure allocated arrays
242 *
243 * \return the input address
244 */
245 if(!cmp)
246 return cmp;
247 RSB_CONDITIONAL_FREE(cmp->IA);
248 RSB_CONDITIONAL_FREE(cmp->JA);
249 RSB_CONDITIONAL_FREE(cmp->VA);
250 /* RSB_BZERO_P(cmp); */
251 /*
252 * Note: we do not free cmp itself.
253 */
254 return cmp;
255 }
256
rsb__transpose_coo_matrix_t(struct rsb_coo_matrix_t * cmp)257 void * rsb__transpose_coo_matrix_t(struct rsb_coo_matrix_t *cmp)
258 {
259 /*!
260 * \ingroup gr_internals
261 * transposes symbolically the given matrix
262 *
263 * \return the input address
264 */
265 if(!cmp)
266 return cmp;
267 RSB_SWAP(rsb_coo_idx_t ,cmp->nr, cmp->nc );
268 RSB_SWAP(rsb_coo_idx_t*,cmp->IA,cmp->JA);
269 return cmp;
270 }
271
rsb__init_blank_pointers(struct rsb_mtx_t * mtxAp)272 void * rsb__init_blank_pointers(struct rsb_mtx_t *mtxAp)
273 {
274 /*!
275 * \ingroup gr_internals
276 * \return the input address
277 * */
278 if(!mtxAp)
279 return mtxAp;
280
281 mtxAp->VA = NULL;
282 mtxAp->indptr = NULL;
283 mtxAp->bindx = NULL;
284 mtxAp->rpntr = NULL;
285 mtxAp->cpntr = NULL;
286 mtxAp->bpntr = NULL;
287 #if RSB_WANT_BITMAP
288 mtxAp->options = NULL; /* exactly ... */
289 #endif /* RSB_WANT_BITMAP */
290 mtxAp->mpntr = NULL;
291 mtxAp->Mpntr = NULL;
292 mtxAp->all_leaf_matrices = NULL;
293 mtxAp->sm[0] = NULL;
294 mtxAp->sm[1] = NULL;
295 mtxAp->sm[2] = NULL;
296 mtxAp->sm[3] = NULL;
297
298 return mtxAp;
299 }
300
rsb__fill_struct(struct rsb_mtx_t * mtxAp,void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,rsb_flags_t flags)301 rsb_err_t rsb__fill_struct(struct rsb_mtx_t *mtxAp, void * VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, rsb_flags_t flags)
302 {
303 /*!
304 * \ingroup gr_internals
305 * initializes a rsb_mtx_t struct to default 'vanilla' values
306 * \return the input address
307 * FIXME: rsb__fill_struct -> rsb__mtx_init
308 */
309
310 if(!mtxAp)
311 return RSB_ERR_GENERIC_ERROR;
312
313 rsb__init_struct(mtxAp);/* redundant ?*/
314 mtxAp->VA = VA;
315 mtxAp->bindx = JA;
316 mtxAp->bpntr = IA;
317 mtxAp->nr = m;
318 mtxAp->nc = k;
319 mtxAp->flags = flags;
320 mtxAp->typecode = typecode;
321
322 return RSB_ERR_NO_ERROR;
323 }
324
rsb__fill_coo_struct(struct rsb_coo_matrix_t * mtxAp,void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_nnz_idx_t nnz,rsb_type_t typecode)325 void * rsb__fill_coo_struct(struct rsb_coo_matrix_t *mtxAp, void * VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_nnz_idx_t nnz, rsb_type_t typecode)
326 {
327 if(!mtxAp)
328 return NULL;
329 mtxAp->IA = IA;
330 mtxAp->JA = JA;
331 mtxAp->VA = VA;
332 mtxAp->nnz = nnz;
333 mtxAp->nr = m;
334 mtxAp->nc = k;
335 mtxAp->typecode = typecode;
336 return mtxAp;
337 }
338
rsb__init_struct(struct rsb_mtx_t * mtxAp)339 void * rsb__init_struct(struct rsb_mtx_t *mtxAp)
340 {
341 /*!
342 * \ingroup gr_internals
343 * initializes a rsb_mtx_t struct to default 'vanilla' values
344 * \return the input address
345 * */
346 if(!mtxAp)
347 return mtxAp;
348
349 rsb__init_blank_pointers(mtxAp);
350
351 mtxAp->flags = RSB_FLAG_NOFLAGS ;
352
353 mtxAp->sat = RSB_TIME_ZERO;
354 mtxAp->eit = RSB_TIME_ZERO;
355 mtxAp->pet = RSB_TIME_ZERO;
356 mtxAp->est = RSB_TIME_ZERO;
357 mtxAp->tat = RSB_TIME_ZERO;
358 mtxAp->cpt = RSB_TIME_ZERO;
359 mtxAp->rpt = RSB_TIME_ZERO;
360
361 return mtxAp;
362 }
363
rsb__destroy_inner(struct rsb_mtx_t * mtxAp)364 void * rsb__destroy_inner(struct rsb_mtx_t *mtxAp)
365 {
366 /*!
367 * \ingroup gr_internals
368 * \input mtxAp is a pointer to a valid matrix structure.
369 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
370 *
371 * Deallocates the guts of a sparse matrix.
372 * (will leave the struct in an inconsistent state)
373 */
374 rsb_submatrix_idx_t i,j;
375 struct rsb_mtx_t * submatrix = NULL;
376
377 //RSB_STDOUT("destroying matrix %p\n",mtxAp);
378
379 if(!mtxAp)
380 goto ret;
381
382 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS))
383 {
384 if(rsb__is_root_matrix(mtxAp))
385 {
386 /* FIXME: unfinished, temporary */
387 /* this is a trick: fitting the whole recursive matrix in three arrays */
388 void *IA = NULL,*JA = NULL,*VA = NULL;
389 rsb_bool_t is_bio = rsb__do_is_matrix_binary_loaded(mtxAp); // binary I/O matrix
390 struct rsb_mtx_t *fsm = rsb__do_get_first_submatrix(mtxAp);
391 rsb_flags_t flags = mtxAp->flags;
392
393 if(!is_bio)
394 RSB_CONDITIONAL_FREE(mtxAp->all_leaf_matrices) // ?!
395 JA = fsm->bindx;
396 VA = fsm->VA;
397 if(!is_bio)
398 {
399 //IA = fsm->bpntr-(fsm->nr+1);
400 IA = fsm->bpntr;
401 }
402 else
403 IA = mtxAp;
404 if(!is_bio)
405 RSB_CONDITIONAL_FREE_MTXAP(mtxAp)// extra allocation
406 // RSB_INFO("VA:%p, IA:%p, JA:%p\n",VA,IA,JA);
407 if(!RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
408 {
409 RSB_CONDITIONAL_FREE(IA);/* these arrays are allowed to be NULL, as it happens during conversions */
410 RSB_CONDITIONAL_FREE(JA);
411 RSB_CONDITIONAL_FREE(VA);
412 }
413 }
414 return NULL;/* no deallocation, in this case */
415 }
416
417 if(rsb__is_recursive_matrix(mtxAp->flags))
418 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
419 {
420 if(submatrix)
421 {
422 rsb__do_mtx_free(submatrix);
423 }
424 }
425
426 if(!(mtxAp->flags & RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR))
427 if(!RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
428 {
429 RSB_CONDITIONAL_FREE(mtxAp->VA);
430 RSB_CONDITIONAL_FREE(mtxAp->bindx);
431 RSB_CONDITIONAL_FREE(mtxAp->bpntr);
432 }
433
434 RSB_CONDITIONAL_FREE(mtxAp->indptr);
435
436 #if RSB_WANT_BITMAP
437 if(mtxAp->options)
438 rsb__destroy_options_t(mtxAp->options);
439 #endif /* RSB_WANT_BITMAP */
440
441 if((mtxAp->flags & RSB_FLAG_OWN_PARTITIONING_ARRAYS)!=0)
442 {
443 RSB_CONDITIONAL_FREE(mtxAp->rpntr);
444 RSB_CONDITIONAL_FREE(mtxAp->cpntr);
445 }
446 #if RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS
447 RSB_CONDITIONAL_FREE(mtxAp->all_leaf_matrices);
448 #endif /* RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS */
449 RSB_BZERO_P(mtxAp);/* this enforces correct usage */
450 ret:
451 return NULL;
452 }
453
rsb__do_get_first_submatrix(const struct rsb_mtx_t * mtxAp)454 struct rsb_mtx_t * rsb__do_get_first_submatrix(const struct rsb_mtx_t *mtxAp)
455 {
456 /*!
457 * \ingroup gr_internals
458 * */
459 if(!mtxAp)
460 return NULL;
461
462 if(rsb__is_recursive_matrix(mtxAp->flags))
463 {
464 rsb_submatrix_idx_t i,j;
465 struct rsb_mtx_t * submatrix;
466 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
467 if(submatrix)
468 return rsb__do_get_first_submatrix(submatrix);
469 }
470 return (struct rsb_mtx_t*)mtxAp;/* FIXME */
471 }
472
rsb__do_mtx_free(struct rsb_mtx_t * mtxAp)473 void * rsb__do_mtx_free(struct rsb_mtx_t *mtxAp)
474 {
475 /*!
476 * \ingroup gr_internals
477 * \param mtxAp a pointer to a matrix structure
478 *
479 * Will destroy a valid matrix and deallocate all of its allocated data.
480 * */
481 rsb_flags_t flags;
482
483 if(!mtxAp)
484 goto ret;
485
486 if( RSB_MTX_HBDF( mtxAp ) )
487 {
488 blas_sparse_matrix bmtxA = RSB_MTX_HBDFH(mtxAp);
489 rsb__BLAS_Xusds(bmtxA);
490 RSB_CONDITIONAL_FREE_MTXAP(mtxAp);
491 goto ret;
492 }
493
494 flags = mtxAp->flags;
495
496 rsb__destroy_inner(mtxAp);
497
498 if(!RSB_DO_FLAG_HAS(flags,RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS))
499 {
500 if(mtxAp && RSB_WANT_ZERO_ON_DESTROY)
501 {
502 RSB_BZERO_P(mtxAp);
503 }
504 RSB_CONDITIONAL_FREE_MTXAP(mtxAp);
505 }
506 ret:
507 return mtxAp;
508 }
509
510
511 #if RSB_WANT_BITMAP
rsb__get_sizeof_options(const struct rsb_options_t * o,rsb_blk_idx_t M_b,rsb_blk_idx_t K_b)512 static size_t rsb__get_sizeof_options(const struct rsb_options_t *o, rsb_blk_idx_t M_b, rsb_blk_idx_t K_b)
513 {
514 /*!
515 * \ingroup gr_internals
516 * \return memory usage
517 * \param o a pointer to a valid rsb_options_t structure
518 *
519 * \return the amount of memory allocated for this structure, deeply
520 * */
521 size_t count = 0;
522
523 if(!o )
524 return 0;
525
526 count += sizeof(*o);
527
528 /* we allocate a new options structure */
529 if(o->bitmap)
530 count += RSB_BYTES_PER_BITMAP(M_b,K_b) ;
531
532 return count;
533 }
534 #endif /* RSB_WANT_BITMAP */
535
rsb__get_sizeof(const struct rsb_mtx_t * mtxAp)536 size_t rsb__get_sizeof(const struct rsb_mtx_t *mtxAp )
537 {
538 /*!
539 * \ingroup gr_internals
540 * \param mtxAp a pointer to a valid rsb_mtx_t structure
541 * \return the amount of memory allocated for this structure, deeply (indices + coefficients).
542 * */
543 size_t count = 0;
544 struct rsb_mtx_t * submatrix = NULL;
545 rsb_submatrix_idx_t i,j;
546 rsb_bool_t istrec = RSB_BOOL_FALSE;
547
548 if(!mtxAp )
549 goto err;
550
551 istrec = rsb__is_terminal_recursive_matrix(mtxAp);
552
553 count += sizeof(*mtxAp);
554
555 #if RSB_WANT_BITMAP
556 if(mtxAp->options)
557 count += rsb__get_sizeof_options(mtxAp->options,mtxAp->M_b,mtxAp->K_b);
558 else
559 return count;
560 #endif /* RSB_WANT_BITMAP */
561 /* we allocate a new options structure */
562 if( mtxAp->rpntr ) count += sizeof(rsb_coo_idx_t)*(mtxAp->M_b+1);
563 if( mtxAp->cpntr ) count += sizeof(rsb_coo_idx_t)*(mtxAp->K_b+1);
564 if(istrec)
565 {
566 #if RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT
567 if(rsb__is_coo_matrix(mtxAp))
568 {
569 if( mtxAp->flags & RSB_FLAG_USE_HALFWORD_INDICES)
570 count += sizeof(rsb_half_idx_t)*(mtxAp->nnz)*2;
571 else
572 count += sizeof(rsb_coo_idx_t)*(mtxAp->nnz)*2;
573 }
574 else
575 {
576 if( mtxAp->flags & RSB_FLAG_USE_HALFWORD_INDICES)
577 count += sizeof(rsb_half_idx_t)*(mtxAp->nnz)+sizeof(rsb_nnz_idx_t)*(mtxAp->nr+1);
578 else
579 count += sizeof(rsb_coo_idx_t)*(mtxAp->nnz)+sizeof(rsb_nnz_idx_t)*(mtxAp->nr+1);
580 }
581 if( mtxAp->VA ) count += mtxAp->el_size*mtxAp->nnz;
582 /* FIXME: missing the amount of memory allocated as extra submatrices for root, and the redundant structs array */
583 #else /* RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT */
584 if( mtxAp->bindx ) count += sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1);
585 if( mtxAp->indptr ) count += sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1);
586 if( mtxAp->bpntr ) count += sizeof(rsb_nnz_idx_t)*(mtxAp->Mdim+1);
587 #if RSB_WANT_BITMAP
588 if( mtxAp->VA ) count += ( RSB_TOTAL_BLOCK_BYTES(mtxAp,mtxAp->options) );
589 #endif /* RSB_WANT_BITMAP */
590 #endif /* RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT */
591 }
592 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
593 if(submatrix)
594 count += rsb__get_sizeof(submatrix);
595
596 err:
597 return count;
598 }
599
rsb__nnz_coord_compar(const void * key,const void * am)600 int rsb__nnz_coord_compar(const void *key, const void *am)
601 {
602 /*!
603 * \ingroup gr_internals
604 * A service function. NOTE:
605 *
606 * Please note that the sole use of this function is the major bottleneck during matrix creation.
607 * When thinking about optimizing matrix creation, come back here: this routine eats up to 90%
608 * of the time required for matrix creation.
609 * */
610 register const rsb_coo_idx_t *iam = am,*ik = key;
611 /*!
612 * this function is used as compar in for stdlib's bsearch
613 * on the ordered arrays p_r and p_c, and will return :
614 * -1, 0, or 1
615 * respectively if the key element is :
616 * less than both, in between, or greater than both *am and am[1].
617 * */
618 return (*ik < iam[1] )? (*ik<*iam?-1:0):1;
619 }
620
rsb__allocate_bitmap(rsb_blk_idx_t rows,rsb_blk_idx_t cols)621 rsb_bitmap_data_t * rsb__allocate_bitmap(rsb_blk_idx_t rows, rsb_blk_idx_t cols)
622 {
623 /*!
624 * \ingroup gr_internals
625 * \param rows the amount of rows the bitmap should have
626 * \param rows the amount of columns the bitmap should have
627 * \return the bitmap area
628 *
629 * Allocates an area of (((cols+sizeof(rsb_bitmap_data_t)-1))/sizeof(rsb_bitmap_data_t) * rows) bytes
630 * to use as a bitmap, through the RSB_BITMAP_SET
631 * and RSB_BITMAP_GET macros.
632 *
633 * This bitmap takes ( ceil(cols/sizeof(rsb_bitmap_data_t))*sizeof(rsb_bitmap_data_t)*rows ) bytes of memory.
634 * it should be roughly 1 bit for block of data
635 * or at worst
636 * ((cols/(sizeof(rsb_bitmap_data_t)*8))+1/(sizeof(rsb_bitmap_data_t)*8))/cols bits for every data block.
637 *
638 * The bitmap is return set to zero.
639 * assumes sizeof(rsb_bitmap_data_t)>1
640 * */
641 if( RSB_INVALID_COO_INDEX(rows) || RSB_INVALID_COO_INDEX(cols))
642 return NULL;
643 return rsb__calloc(RSB_BYTES_PER_BITMAP(rows,cols));
644 }
645
rsb__allocate_bitvector(rsb_blk_idx_t nbits)646 rsb_bitmap_data_t * rsb__allocate_bitvector(rsb_blk_idx_t nbits)
647 {
648 /*
649 * \ingroup gr_internals
650 * Allocates an array for \c nbits bits, set to zero.
651 * */
652 #ifdef RSB_BITMAP_ROW_MAJOR_ORDER
653 return rsb__allocate_bitmap(nbits,1);
654 #else /* RSB_BITMAP_ROW_MAJOR_ORDER */
655 return rsb__allocate_bitmap(1,nbits);
656 #endif /* RSB_BITMAP_ROW_MAJOR_ORDER */
657 }
658
rsb__bitmap_bit_count(const rsb_bitmap_data_t * bitmap,const rsb_blk_idx_t rows,const rsb_blk_idx_t cols)659 rsb_blk_idx_t rsb__bitmap_bit_count(const rsb_bitmap_data_t *bitmap, const rsb_blk_idx_t rows, const rsb_blk_idx_t cols)
660 {
661 /*!
662 * \ingroup gr_internals
663 * \param bitmap is the bitmap data pointer
664 * \param rows is the number of rows the bitmap was allocated for
665 * \param cols is the number of columns the bitmap was allocated for
666 * \return -1 in case of error, the set bit count otherwise
667 *
668 * This function counts the bits in a bitmap
669 * Note that it is not dependent on the internal bitmap storage scheme,
670 * (column major or row major), but only as long as the bit pool is uniform.
671 *
672 * TODO : this is not core functionality, so it could be moved somewhere else.
673 * */
674 register rsb_blk_idx_t i,bc = 0;
675 register rsb_bitmap_data_t w;
676 if(!bitmap)
677 return RSB_ERR_BADARGS;
678 for(i=0;i<((rsb_blk_idx_t)RSB_BYTES_PER_BITMAP(rows,cols)/sizeof(rsb_bitmap_data_t));++i)
679 {
680 w = bitmap[i];
681 /* TODO : no stdlib functions for counting bits in integers ? */
682 //b += (w&1); while(w) b += ((w /= 2)&1);
683 while(w!=0) {bc += (w&1);w /= 2;}
684 }
685 /* warning : overflow check missing */
686 return bc;
687 }
688
rsb__get_block_address(rsb_blk_idx_t blockrow,rsb_blk_idx_t blockcolumn,const struct rsb_mtx_t * mtxAp)689 void* rsb__get_block_address( rsb_blk_idx_t blockrow, rsb_blk_idx_t blockcolumn, const struct rsb_mtx_t *mtxAp)
690 {
691 /*!
692 * \ingroup gr_internals
693 * \param blockrow the block row
694 * \param blockcolumn the block column
695 * \param mtxAp a valid matrix structure pointer
696 * \return a pointer to the block itself or NULL if it is not present
697 *
698 * A service function for getting the (blockrow,blockcolumn) block address inside the matrix.
699 *
700 * This function is SLOW, and should be used for debugging purposes only !
701 * ( it uses indirect indexing to catch elements )
702 * */
703 rsb_nnz_idx_t l = 0;
704 rsb_nnz_idx_t fnze = 0;
705 #if RSB_WANT_BITMAP
706 struct rsb_options_t *o = NULL;
707 #endif /* RSB_WANT_BITMAP */
708 rsb_err_t errval = RSB_ERR_NO_ERROR;
709 rsb_nnz_idx_t offset = 0;
710
711 if(!mtxAp)
712 {errval = RSB_ERR_BADARGS;goto err;}
713
714 #if RSB_WANT_BITMAP
715 o = mtxAp->options;
716 if(!o)
717 {errval = RSB_ERR_BADARGS;goto err;}
718 #endif /* RSB_WANT_BITMAP */
719
720 if(RSB_BLK_ADD_OVERFLOW(blockrow,RSB_INDEX_OF_SAFE_EXTRA))
721 {errval = RSB_ERR_LIMITS;goto err;}
722
723 if(RSB_BLK_ADD_OVERFLOW(blockcolumn,RSB_INDEX_OF_SAFE_EXTRA))
724 {errval = RSB_ERR_LIMITS;goto err;}
725
726 /* i is the working block row */
727 if( mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
728 {
729 if(mtxAp->bpntr[blockcolumn]==mtxAp->bpntr[blockcolumn+1])
730 goto err;/* empty block column */
731 fnze = mtxAp->bpntr[blockcolumn]; /* first nonzero entry in bindx */
732 while(mtxAp->bindx[fnze+l]!=blockrow)++l;
733 }
734 else
735 {
736 if(mtxAp->bpntr[blockrow]==mtxAp->bpntr[blockrow+1])
737 goto err;/* empty block row */
738 fnze = mtxAp->bpntr[blockrow]; /* first nonzero entry in bindx */
739 while(mtxAp->bindx[fnze+l]!=blockcolumn)++l;
740 }
741
742 if(RSB_NNZ_ADD_OVERFLOW(fnze,l))
743 {errval = RSB_ERR_LIMITS;goto err;}
744
745 offset = fnze+l;
746 //return ((rsb_byte_t*)(mtxAp->VA)) + mtxAp->indptr[offset] * mtxAp->el_size;
747 return RSB_BLOCK_ADDRESS(mtxAp,offset);
748 err:
749 rsb__do_perror(NULL,errval);
750 return NULL;
751 }
752
rsb__recheck_insertion(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,const struct rsb_mtx_t * mtxAp,const struct rsb_options_t * o)753 rsb_err_t rsb__recheck_insertion(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, const struct rsb_mtx_t *mtxAp, const struct rsb_options_t *o)
754 {
755 /*!
756 * \ingroup gr_internals
757 * This is a very slow debug function.
758 * It should be supplied with some sparse matrix construction arrays in any order, and
759 * a fully constructed matrix structure.
760 *
761 * \note: Does not support block column major matrices and sorted ones.
762 *
763 * TODO : should support more matrix formats ( e.g.: block column majer )
764 * FIXME : obsolete, very limited function
765 *
766 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
767 * */
768 register rsb_blk_idx_t i,j;
769 rsb_nnz_idx_t k;
770 rsb_nnz_idx_t missing = 0;
771 const rsb_byte_t *moff = NULL;
772 const rsb_byte_t *src = NULL;
773
774 if(!mtxAp || !o )return RSB_ERR_BADARGS;
775 if(mtxAp->flags & RSB_FLAG_SORT_INPUT) return RSB_ERR_BADARGS;
776 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER) return RSB_ERR_UNIMPLEMENTED_YET;
777 if(! o->bitmap) return RSB_ERR_UNSUPPORTED_OPERATION;/* when building sorted matrices, we don't create bitmaps .. should we ? */
778
779 #if RSB_WANT_BITMAP
780 for(k=0;k<nnz;++k) { rsb_coo_idx_t iI = IA[k],iJ = JA[k];RSB_BLOCK_UNSET_BIT_FOR_NNZ(&iI,&iJ,k,mtxAp); }
781 for(k=0;k<nnz;++k) { RSB_BLOCK_SET_BIT_FOR_NNZ( IA,JA,k,mtxAp); }
782 #endif /* RSB_WANT_BITMAP */
783
784 for(k=0;k<nnz;++k)
785 {
786 i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,mtxAp);
787 j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,mtxAp);
788 if(!(RSB_BITMAP_GET(o->bitmap,mtxAp->M_b,mtxAp->K_b,i,j))) ++missing;
789 }
790
791 if(!missing)
792 RSB_STDERR("checking structure : there are no blocks missing.\n");
793 else
794 RSB_STDERR("checking structure : there are %zd blocks missing\n",(size_t)missing);
795 if(missing) return RSB_ERR_GENERIC_ERROR;
796 for(k=0;k<nnz;++k)
797 {
798 i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,mtxAp);
799 j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,mtxAp);
800 moff = rsb__get_block_address(i,j,mtxAp);
801 if(!moff)
802 {
803 RSB_STDERR("critical block error on block (%d,%d).\n",i,j);
804 return RSB_ERR_GENERIC_ERROR;
805 }
806 else
807 {
808 moff += RSB_GET_INTRA_BLOCK_OFFSET(IA[k],JA[k],i,j,mtxAp);
809 src = VA;
810 src += mtxAp->el_size*k;
811
812 if(RSB_MEMCMP(src,moff,mtxAp->el_size))
813 {
814 /* may give problems when flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER */
815 RSB_ERROR("critical error: %d'th nonzero (%d,%d) at (%d,%d) in block (%d,%d) is wrong!\n",
816 k,
817 IA[k]+1,JA[k]+1,
818 RSB_INTRA_BLOCK_ROW(IA[k],i,mtxAp),RSB_INTRA_BLOCK_COLUMN(JA[k],j,mtxAp),i,j);
819 /* warning : the following instruction is potentially harmful ! */
820 #ifdef RSB_DEBUG_BLOCK_STUFF
821 RSB_STDERR("should be : 0x%x\n",*(int*)src );
822 RSB_STDERR("is : 0x%x\n",*((int*)(moff)));
823 /* RSB_STDERR("should be : %g\n",src );
824 RSB_STDERR("is : %g\n",*((float*)(moff)));*/
825 #endif /* RSB_DEBUG_BLOCK_STUFF */
826 return RSB_ERR_GENERIC_ERROR;
827 }
828 }
829 }
830 return RSB_ERR_NO_ERROR;
831 }
832
rsb__do_is_valid_pinfo_t(const struct rsb_mtx_partitioning_info_t * pinfop)833 rsb_err_t rsb__do_is_valid_pinfo_t(const struct rsb_mtx_partitioning_info_t * pinfop)
834 {
835 /*!
836 * \ingroup gr_internals
837 * \param pinfop should specify a partitioning info array
838 *
839 * This is a strictly debugging function, whose sole purpose is to verify
840 * the partitioning arrays contents of a rsb_mtx_partitioning_info_t structure.
841 * */
842 rsb_nnz_idx_t k;
843 if(pinfop->nr<1)
844 {
845 RSB_PERR_GOTO(err,"m == %d ?\n",pinfop->nr);
846 }
847 if(pinfop->nc<1)
848 {
849 RSB_PERR_GOTO(err,"k == %d ?\n",pinfop->nc);
850 }
851 if(pinfop->rpntr && pinfop->M_b<1)
852 {
853 RSB_PERR_GOTO(err,"M_b == %d ?\n",pinfop->M_b-1);
854 }
855 if(pinfop->cpntr && pinfop->K_b<1)
856 {
857 RSB_PERR_GOTO(err,"K_b == %d ?\n",pinfop->K_b-1);
858 }
859
860 if(pinfop->rpntr && pinfop->cpntr )
861 {
862 /* FIXME */
863 if(pinfop->rpntr[pinfop->M_b]<=pinfop->rpntr[pinfop->M_b-1])
864 {
865 RSB_PERR_GOTO(err,"last (%d) rpntr element is %d <= %d\n",pinfop->M_b,pinfop->rpntr[pinfop->M_b],pinfop->rpntr[pinfop->M_b-1]);
866 }
867 if(pinfop->cpntr[pinfop->K_b]<=pinfop->cpntr[pinfop->K_b-1])
868 {
869 RSB_PERR_GOTO(err,"last (%d) cpntr element is %d <= %d\n",pinfop->K_b,pinfop->cpntr[pinfop->K_b],pinfop->cpntr[pinfop->K_b-1]);
870 }
871
872 for(k=0;k<pinfop->M_b;++k)if(pinfop->rpntr[k]<0)
873 {
874 RSB_PERR_GOTO(err,"bad partitioning : rpntr[%d]=%d\n",k,pinfop->rpntr[k]);
875 }
876 for(k=0;k<pinfop->K_b;++k)if(pinfop->cpntr[k]<0)
877 {
878 RSB_PERR_GOTO(err,"bad partitioning : cpntr[%d]=%d\n",k,pinfop->cpntr[k]);
879 }
880 for(k=0;k<pinfop->M_b;++k)if(pinfop->rpntr[k]>pinfop->nr)
881 {
882 RSB_PERR_GOTO(err,"bad partitioning : rpntr[%d]=%d > m==%d\n",k,pinfop->rpntr[k],pinfop->nr);
883 }
884 for(k=0;k<pinfop->K_b;++k)if(pinfop->cpntr[k]>pinfop->nc)
885 {
886 RSB_PERR_GOTO(err,"bad partitioning : cpntr[%d]=%d > k==%d\n",k,pinfop->cpntr[k],pinfop->nc);
887 }
888 }
889 return RSB_ERR_NO_ERROR;
890 err:
891 return RSB_ERR_GENERIC_ERROR;
892 }
893
rsb__compute_partial_fillin_for_nnz_fraction(const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,struct rsb_mtx_partitioning_info_t * pinfop,size_t * element_countp,size_t * block_countp)894 rsb_err_t rsb__compute_partial_fillin_for_nnz_fraction(const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz, struct rsb_mtx_partitioning_info_t * pinfop, size_t * element_countp, size_t * block_countp)
895 {
896 /*!
897 * \ingroup gr_internals
898 * see rsb__compute_partial_fillin_for_nnz_fractions
899 * */
900 return rsb__compute_partial_fillin_for_nnz_fractions(IA,JA,&nnz,1,pinfop,element_countp,block_countp);
901 }
902
rsb__compute_partial_fillin_for_nnz_fractions(const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t * nnz,const rsb_nnz_idx_t nnzn,struct rsb_mtx_partitioning_info_t * pinfop,size_t * element_countp,size_t * block_countp)903 rsb_err_t rsb__compute_partial_fillin_for_nnz_fractions(const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA,const rsb_nnz_idx_t * nnz, const rsb_nnz_idx_t nnzn, struct rsb_mtx_partitioning_info_t * pinfop, size_t * element_countp, size_t * block_countp)
904 {
905 /*!
906 * \ingroup gr_internals
907 * \param IA is a row indices array sized nnz
908 * \param JA is a column indices array sized nnz
909 * \param nnz is the length of IA and JA
910 * \param element_countp is where the element counts will be written
911 * \param block_countp is where the block counts will be written
912 *
913 * Will estimate fillin for the first nnz[ni]<nnz[ni+1]<...<nnz[nnzn-1] elements, with nnz[nnzn-1] being
914 * less than or equal to the number of elements in the IA, JA, element_countp, block_countp arrays.
915 *
916 * Note: this function performs almost no data validation.
917 * Note : this is not a service but an experimental function, and is very slow.
918 * Note : another wayof structuring thisfunction would beto make it accept a 2*nnzn sized array with lower
919 * and upper segment indices both specified.
920 * This would have been more flexible but would require some change in this function code.
921 * TODO : this is not core functionality, so this function could be moved elsewhere
922 * */
923
924 rsb_bitmap_data_t * bitmap = NULL;
925 size_t element_count = 0;
926 rsb_nnz_idx_t block_count = 0;
927 rsb_nnz_idx_t k = 0,l = 0;/* were -1 */
928 rsb_blk_idx_t i = 0,j = 0;/* were -1 */
929
930 if( !IA || !JA || !pinfop || !element_countp || !block_countp ) goto err;
931 if( nnzn < 1 ) goto err;
932 if( RSB_INVALID_BLK_INDEX(pinfop->M_b) || RSB_INVALID_BLK_INDEX(pinfop->K_b) )goto err;
933 if( !pinfop->rpntr || !pinfop->cpntr )goto err;
934 bitmap = rsb__allocate_bitmap(pinfop->M_b,pinfop->K_b);
935 if(!bitmap)goto err;
936
937 #ifdef RSB_DEBUG_INPUT
938 if(rsb__do_is_valid_pinfo_t(pinfop))
939 {
940 RSB_PERR_GOTO(err,RSB_ERRM_ES);
941 }
942 #endif /* RSB_DEBUG_INPUT */
943
944 if(1)
945 {
946 size_t skip = 0,skipu = 0;//new
947 rsb_nnz_idx_t c = 0;
948 rsb_nnz_idx_t sl;
949
950 skip = 0;
951
952 skipu = (nnz[nnzn-1]/(nnzn*nnzn));
953 skip = nnzn*skipu;
954 for(sl=0;sl<nnzn;++sl)
955 block_countp[sl] = element_countp[sl] = 0;
956
957 #if 1
958 /* An alternative way, much more stable!
959 * However, it underestimates the nnz count so it should in some manner mitigated! */
960 for(l=0;l<nnzn;++l)
961 {
962 for(sl=0;sl<nnzn;++sl)
963 {
964 //RSB_INFO("#i: %d / %d (%d)\n",sl*skip+l*skipu,sl*skip+(l+1)*skipu,nnzn);
965 for(k=sl*skip+l*skipu;k<sl*skip+(l+1)*skipu;++k)
966 {
967
968 ++c;
969 /* NOTE : the following ifelse statements are for situations where m<br or k<bc */
970 if(pinfop->M_b>1)
971 i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,pinfop);
972 else
973 i = 0;
974 if(pinfop->K_b>1)
975 j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,pinfop);
976 else
977 j = 0;
978
979 /* if the bit is not already set */
980 if(!(RSB_BITMAP_GET(bitmap,pinfop->M_b,pinfop->K_b,i,j)))
981 {
982 element_count += GET_BLOCK_SIZE(i,j,pinfop);
983 (block_count)++;
984 RSB_BITMAP_SET(bitmap,pinfop->M_b,pinfop->K_b,i,j) ;
985 }
986 }
987 block_countp[l] = block_count;
988 element_countp[l] = element_count;
989 }
990 }
991 l = nnzn-1;
992 //RSB_INFO("#c: %d / %d (%d).. %d -> %d\n",c,block_countp[l],nnzn,nnzn*skip,nnz[l]);
993 for(k=c;k<nnz[l];++k)
994 //for(k=nnzn*skip;k<nnz[l];++k)
995 {
996 // ++c;
997
998 i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,pinfop);
999 j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,pinfop);
1000
1001 /* if the bit is not already set */
1002 if(!(RSB_BITMAP_GET(bitmap,pinfop->M_b,pinfop->K_b,i,j)))
1003 {
1004 element_countp[l] += GET_BLOCK_SIZE(i,j,pinfop);
1005 block_countp[l]++;
1006 RSB_BITMAP_SET(bitmap,pinfop->M_b,pinfop->K_b,i,j) ;
1007 }
1008 }
1009 //RSB_INFO("#c: %d / %d (%d)\n",c,block_countp[l],nnzn);
1010 #endif
1011 }
1012 else
1013 for(l=0;l<nnzn;++l)
1014 {
1015 rsb_nnz_idx_t li;
1016 if(l==0)
1017 li = 0;
1018 else
1019 li = nnz[l-1];/* will the first loop optimized by the compiler ? :) */
1020
1021 for(k=li;k<nnz[l];++k)
1022 {
1023
1024 i = RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,pinfop);
1025 j = RSB_GET_BLOCK_COL_FOR_NZ(JA+k,pinfop);
1026
1027 /* if the bit is not already set */
1028 if(!(RSB_BITMAP_GET(bitmap,pinfop->M_b,pinfop->K_b,i,j)))
1029 {
1030 element_count += GET_BLOCK_SIZE(i,j,pinfop);
1031 (block_count)++;
1032 RSB_BITMAP_SET(bitmap,pinfop->M_b,pinfop->K_b,i,j) ;
1033 }
1034 }
1035 if(block_countp)
1036 block_countp[l] = block_count;
1037 if(element_countp)
1038 element_countp[l] = element_count;
1039 }
1040
1041 RSB_CONDITIONAL_FREE(bitmap);
1042 return RSB_ERR_NO_ERROR;
1043 err:
1044 return RSB_ERR_ENOMEM;
1045 }
1046
1047 #if RSB_WANT_BITMAP
rsb_element_block_count_and_bitmap_from_coo_partitioning(const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,size_t * element_countp,rsb_nnz_idx_t * block_countp,rsb_bitmap_data_t ** bitmapp,const rsb_flags_t flags,struct rsb_mtx_t * mtxAp)1048 static rsb_err_t rsb_element_block_count_and_bitmap_from_coo_partitioning(const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, size_t * element_countp, rsb_nnz_idx_t * block_countp, rsb_bitmap_data_t ** bitmapp, const rsb_flags_t flags, struct rsb_mtx_t * mtxAp)
1049 {
1050 /*!
1051 * \ingroup gr_internals
1052 * \param IA is a row indices array sized nnz
1053 * \param JA is a column indices array sized nnz
1054 * \param nnz is the length of IA and JA
1055 * \param pinfop should specify a partitioning info array
1056 * \param element_countp is where the element counts will be written
1057 * \param block_countp is where the block counts will be written
1058 *
1059 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
1060 *
1061 * WARNING : IA and JA can respectively point to columns and rows arrays instead of rows and columns,
1062 * as long as the pinfop information is swapped accordingly.
1063 * In this way a transposed bitmap will be allocated (note that its size could be the same. guess why..).
1064 *
1065 * In case of error, no bitmap will be allocated, but its pointer may be overwritten.
1066 * In case of success, a bitmap structure will be allocated.
1067 * */
1068
1069 rsb_nnz_idx_t k = 0;
1070 rsb_bitmap_data_t * bitmap = NULL;
1071 size_t element_count = 0;
1072 rsb_nnz_idx_t block_count = 0;
1073 rsb_blk_idx_t mI = 0,MI = 0;
1074 const rsb_coo_idx_t * mIndx = NULL,* MIndx = NULL;
1075
1076 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1077 {
1078 mIndx = IA;
1079 MIndx = JA;
1080 }
1081 else
1082 {
1083 mIndx = JA;
1084 MIndx = IA;
1085 }
1086
1087 if( !IA || !JA || !pinfop || !element_countp || !bitmapp || !block_countp ) goto err;
1088 if( RSB_INVALID_NNZ_INDEX(nnz) ) goto err;
1089 if( RSB_INVALID_BLK_INDEX(pinfop->M_b) || RSB_INVALID_BLK_INDEX(pinfop->K_b) )goto err;
1090 if( !pinfop->rpntr || !pinfop->cpntr )goto err;
1091
1092 bitmap = rsb__allocate_bitmap(mtxAp->Mdim,mtxAp->mdim);
1093
1094 if(!bitmap)goto err;
1095
1096 if( mtxAp->flags & RSB_FLAG_SHOULD_DEBUG )
1097 if(rsb__do_is_valid_pinfo_t(pinfop))
1098 {
1099 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1100 }
1101
1102 if(RSB_WANT_VERBOSE_MESSAGES)
1103 RSB_INFO("counting matrix blocks ..\n");
1104
1105 for(k=0;RSB_LIKELY(k<nnz);++k)
1106 {
1107 /*
1108 * We count the amount of elements for each block, setting bits in
1109 * our bitmap where a block should be placed, and leaving unset bits
1110 * which correspond to zero blocks
1111 * */
1112
1113 MI = RSB_GET_BLOCK_MAJ_FOR_NZ(MIndx+k,mtxAp);
1114 mI = RSB_GET_BLOCK_MIN_FOR_NZ(mIndx+k,mtxAp);
1115
1116 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(MI));
1117 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(mI));
1118
1119 if(mI>=mtxAp->mdim)
1120 {
1121 RSB_PERR_GOTO(err," j=%d >= o->K_b=%d\n ",mI,mtxAp->mdim);
1122 }
1123 if(mI <0 )
1124 {
1125 RSB_PERR_GOTO(err," j=%d < 0 \n",mI);
1126 }
1127 if(MI>=mtxAp->Mdim)
1128 {
1129 RSB_PERR_GOTO(err," i=%d >= o->M_b=%d\n ",MI,mtxAp->Mdim);
1130 }
1131 if(MI <0 )
1132 {
1133 RSB_PERR_GOTO(err," i=%d < 0 \n",MI);
1134 }
1135
1136 /* if the bit is not already set */
1137 if(!(RSB_BITMAP_GET(bitmap,mtxAp->Mdim,mtxAp->mdim,MI,mI)))
1138 {
1139 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1140 element_count += GET_BLOCK_SIZE(mI,MI,pinfop);
1141 else
1142 element_count += GET_BLOCK_SIZE(MI,mI,pinfop);
1143
1144 (block_count)++;
1145 RSB_BITMAP_SET(bitmap,mtxAp->Mdim,mtxAp->mdim,MI,mI) ;
1146 }
1147 }
1148
1149 if(block_count > nnz)
1150 {
1151 RSB_PERR_GOTO(err,"(mtxAp->block_count=%d >= n=%d)!\n",block_count,nnz);
1152 }
1153
1154 *block_countp = block_count;
1155 *element_countp = element_count;
1156 *bitmapp = bitmap;
1157
1158 return RSB_ERR_NO_ERROR;
1159 err:
1160 RSB_CONDITIONAL_FREE(bitmap);
1161 return RSB_ERR_GENERIC_ERROR;
1162 }
1163 #endif /* RSB_WANT_BITMAP */
1164
rsb__do_set_init_storage_flags(struct rsb_mtx_t * mtxAp,rsb_flags_t flags)1165 rsb_err_t rsb__do_set_init_storage_flags(struct rsb_mtx_t *mtxAp, rsb_flags_t flags)
1166 {
1167 rsb_err_t errval = RSB_ERR_NO_ERROR;
1168 rsb_flags_t storage_only_flags = RSB_DO_FLAGS_EXTRACT_STORAGE(flags);
1169
1170 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1171 if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_LINKED_STORAGE))
1172 {
1173 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1174 #ifdef RSB_MATRIX_STORAGE_LC
1175 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_LC;
1176 #els /* RSB_MATRIX_STORAGE_LC */e
1177 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1178 #endif /* RSB_MATRIX_STORAGE_LC */
1179 else
1180 #ifdef RSB_MATRIX_STORAGE_LR
1181 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_LR;
1182 #else /* RSB_MATRIX_STORAGE_LR */
1183 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1184 #endif /* RSB_MATRIX_STORAGE_LR */
1185 }
1186 else
1187 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1188 {
1189 if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_COO_STORAGE))
1190 {
1191 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1192 #ifdef RSB_MATRIX_STORAGE_BCOC
1193 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOC;
1194 #else /* RSB_MATRIX_STORAGE_BCOC */
1195 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1196 #endif /* RSB_MATRIX_STORAGE_BCOC */
1197 else
1198 #ifdef RSB_MATRIX_STORAGE_BCOR
1199 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOR;
1200 #else /* RSB_MATRIX_STORAGE_BCOR */
1201 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1202 #endif /* RSB_MATRIX_STORAGE_BCOR */
1203 }
1204 else //FIXME: this switch could cohexist with CSS, and be processed later or be ignored (in the old constructor)
1205 if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_COO_STORAGE))
1206 {
1207 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1208 #ifdef RSB_MATRIX_STORAGE_BCOC
1209 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOC;
1210 #else /* RSB_MATRIX_STORAGE_BCOC */
1211 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1212 #endif /* RSB_MATRIX_STORAGE_BCOC */
1213 else
1214 #ifdef RSB_MATRIX_STORAGE_BCOR
1215 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCOR;
1216 #else /* RSB_MATRIX_STORAGE_BCOR */
1217 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1218 #endif /* RSB_MATRIX_STORAGE_BCOR */
1219 }
1220 else
1221 if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_BCSS_STORAGE))
1222 {
1223 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1224 #ifdef RSB_MATRIX_STORAGE_BCSC
1225 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCSC;
1226 #else /* RSB_MATRIX_STORAGE_BCSC */
1227 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1228 #endif /* RSB_MATRIX_STORAGE_BCSC */
1229 else
1230 #ifdef RSB_MATRIX_STORAGE_BCSR
1231 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_BCSR;
1232 #else /* RSB_MATRIX_STORAGE_BCSR */
1233 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1234 #endif /* RSB_MATRIX_STORAGE_BCSR */
1235 }
1236 else
1237 if(RSB_DO_FLAG_HAS(storage_only_flags,RSB_FLAG_WANT_FIXED_BLOCKING_VBR))
1238 {
1239 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1240 #ifdef RSB_MATRIX_STORAGE_VBC
1241 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_VBC;
1242 #else /* RSB_MATRIX_STORAGE_VBC */
1243 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1244 #endif /* RSB_MATRIX_STORAGE_VBC */
1245 else
1246 #ifdef RSB_MATRIX_STORAGE_VBR
1247 mtxAp->matrix_storage = RSB_MATRIX_STORAGE_VBR;
1248 #else /* RSB_MATRIX_STORAGE_VBR */
1249 {errval = RSB_ERR_UNSUPPORTED_FORMAT;goto err;}
1250 #endif /* RSB_MATRIX_STORAGE_VBR */
1251 }
1252 else
1253 {
1254 /* undetermined format or a merge of formats (happens on recursive matrices during construction) */
1255 mtxAp->matrix_storage = storage_only_flags;
1256 }
1257 }
1258 err:
1259 RSB_DO_ERR_RETURN(errval)
1260 }
1261
rsb__set_init_flags_and_stuff(struct rsb_mtx_t * mtxAp,struct rsb_options_t * o,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_nnz_idx_t nnz,rsb_nnz_idx_t block_count,rsb_nnz_idx_t element_count,rsb_type_t typecode,rsb_flags_t flags)1262 rsb_err_t rsb__set_init_flags_and_stuff( struct rsb_mtx_t *mtxAp, struct rsb_options_t * o, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_nnz_idx_t nnz, rsb_nnz_idx_t block_count, rsb_nnz_idx_t element_count, rsb_type_t typecode, rsb_flags_t flags )
1263 {
1264 /**
1265 * \ingroup gr_internals
1266 *
1267 * This inner service function sets some flags and variables during matrix construction.
1268 *
1269 * */
1270 rsb_err_t errval = RSB_ERR_NO_ERROR;
1271
1272 if(!mtxAp /*|| !o */ /* FIXME: o disabled lately */
1273 #if !RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
1274 || !pinfop
1275 #endif /* RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS */
1276 )
1277 {
1278 errval = RSB_ERR_BADARGS;
1279 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1280 }
1281
1282 mtxAp->flags = flags;
1283 mtxAp->typecode =typecode;
1284 mtxAp->el_size = RSB_NUMERICAL_TYPE_SIZE(typecode);
1285 #if RSB_WANT_BITMAP
1286 mtxAp->options = NULL; // we ignore o
1287 // mtxAp->options = o;
1288 #endif /* RSB_WANT_BITMAP */
1289 mtxAp->nnz = nnz;
1290 mtxAp->element_count = element_count;
1291 mtxAp->block_count = block_count;
1292
1293 if(pinfop)
1294 {
1295 mtxAp->M_b = pinfop->M_b;
1296 mtxAp->K_b = pinfop->K_b;
1297 mtxAp->nr = pinfop->nr;
1298 mtxAp->nc = pinfop->nc;
1299 mtxAp->rpntr = pinfop->rpntr;
1300 mtxAp->cpntr = pinfop->cpntr;
1301 //#if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
1302 mtxAp->br = pinfop->br;
1303 mtxAp->bc = pinfop->bc;
1304 //#endif /* RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR */
1305 }
1306 else
1307 {
1308 mtxAp->M_b = m;
1309 mtxAp->K_b = k;
1310 mtxAp->nr = m;
1311 mtxAp->nc = k;
1312 mtxAp->rpntr = NULL;
1313 mtxAp->cpntr = NULL;
1314 //#if RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR
1315 mtxAp->br = 1;
1316 mtxAp->bc = 1;
1317 //#endif /* RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR */
1318 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(mtxAp->br+1));
1319 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(mtxAp->bc+1));
1320 }
1321 if(RSB_IS_INVALID_TYPE_SIZE(mtxAp->el_size = rsb__do_sizeof(mtxAp->typecode)))
1322 {
1323 errval = RSB_ERR_INTERNAL_ERROR;
1324 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1325 }
1326
1327 if((errval = rsb__do_set_init_storage_flags(mtxAp,flags))!=RSB_ERR_NO_ERROR)
1328 {
1329 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1330 }
1331
1332 if(mtxAp->br==1 && mtxAp->bc==1)
1333 {
1334 mtxAp->M_b = mtxAp->nr;
1335 mtxAp->K_b = mtxAp->nc;
1336 }
1337
1338 /* setting aliases */
1339 if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1340 {
1341 mtxAp->Mdim = mtxAp->K_b;
1342 mtxAp->mdim = mtxAp->M_b;
1343 mtxAp->mpntr = mtxAp->rpntr;
1344 mtxAp->Mpntr = mtxAp->cpntr;
1345 }
1346 else
1347 {
1348 mtxAp->Mdim = mtxAp->M_b;
1349 mtxAp->mdim = mtxAp->K_b;
1350 mtxAp->Mpntr = mtxAp->rpntr;
1351 mtxAp->mpntr = mtxAp->cpntr;
1352 }
1353
1354 // RSB_DEBUG_ASSERT(mtxAp->Mdim);
1355 // RSB_DEBUG_ASSERT(mtxAp->mdim);
1356 // RSB_DEBUG_ASSERT(mtxAp->rpntr);
1357 // RSB_DEBUG_ASSERT(mtxAp->cpntr);
1358 RSB_DEBUG_ASSERT(mtxAp->el_size);
1359
1360 return RSB_ERR_NO_ERROR;
1361 err:
1362 RSB_DO_ERR_RETURN(errval)
1363 }
1364
1365 #if 0
1366 rsb_err_t rsb_dump_matrix ( const struct rsb_mtx_t *mtxAp )
1367 {
1368 /*!
1369 * \ingroup gr_internals
1370 *
1371 * \param mtxAp is a valid matrix structure pointer
1372 * \param diagonal is an array sized as min(mtxAp->nr,mtxAp->nc) which on exit will contain the diagonal elements.
1373 * \return -1 in case of error, 0 otherwise
1374 *
1375 * FIXME : UNTESTED AND UNDOCUMENTED AND UNFINISHED
1376 * FIXME : USE rsb_print_matrix and delete this ?
1377 * */
1378 register rsb_nnz_idx_t baserow,basecolumn;
1379 register rsb_blk_idx_t rows,columns;
1380 register rsb_blk_idx_t blockrow,blockcolumn;
1381 register rsb_byte_t *bp;
1382
1383 RSB_INFO("%% [!] TESTING CODE !\n");
1384 RSB_INFO("%%rows:%d columns:%d blocks:%d\n",mtxAp->nr,mtxAp->nc,mtxAp->block_count);
1385 RSB_INFO("%d %d %d\n", mtxAp->nr,mtxAp->nc,mtxAp->nnz);
1386 RSB_GET_FIRST_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1387 while(!RSB_GOT_LAST_BLOCK_POINTER(mtxAp))
1388 {
1389 rsb_coo_idx_t r,c;
1390 /*
1391 * FIXME
1392 * */
1393 // RSB_INFO("%x \n", bp) ;
1394 // RSB_INFO("_k : %d %d ", _k,_lastk) ;
1395 // RSB_INFO("%d %d ", baserow,basecolumn) ;
1396 RSB_INFO("%d %d\n", rows,columns) ;
1397 #if 1
1398 for(r=0;r<rows;++r)
1399 for(c=0;c<columns;++c)
1400 {
1401 RSB_INFO("%d %d %lg\n", baserow+r,basecolumn+c,((double*)bp)[columns*r+c]) ;
1402 }
1403 #endif
1404 RSB_GET_NEXT_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1405 }
1406
1407 return RSB_ERR_NO_ERROR;
1408 }
1409 #endif
1410
rsb__do_insert_sorted(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop)1411 rsb_err_t rsb__do_insert_sorted( struct rsb_mtx_t * mtxAp, const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop)
1412 {
1413 /*!
1414 * \ingroup gr_internals
1415 * Inserts in the matrix structures the specified coo elements, sorted accordingly to the specified BCSR blocking.
1416 */
1417
1418 rsb_coo_idx_t blockrows = 0;
1419 rsb_coo_idx_t blockcolumns = 0;
1420 rsb_coo_idx_t baserow = 0;
1421 rsb_coo_idx_t basecolumn = 0;
1422 rsb_nnz_idx_t *indptr = mtxAp->indptr;
1423 const rsb_coo_idx_t *Mpntr = NULL;
1424 const rsb_coo_idx_t *mpntr = NULL;
1425 const rsb_coo_idx_t *MIndx = NULL;
1426 const rsb_coo_idx_t *mIndx = NULL;
1427 rsb_blk_idx_t mI = 0,MI = 0;
1428 rsb_err_t errval = RSB_ERR_NO_ERROR;
1429 rsb_nnz_idx_t k = 0; /* will index a nnz sized array */
1430 rsb_nnz_idx_t K = 0;
1431 rsb_byte_t*dst = NULL;
1432 rsb_byte_t*src = NULL;
1433
1434 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1435 {
1436 mpntr = pinfop->rpntr;
1437 Mpntr = pinfop->cpntr;
1438 mIndx = IA;
1439 MIndx = JA;
1440 }
1441 else
1442 {
1443 Mpntr = pinfop->rpntr;
1444 mpntr = pinfop->cpntr;
1445 MIndx = IA;
1446 mIndx = JA;
1447 }
1448
1449 k = mI = MI = 0;K = 0;
1450 blockrows = Mpntr[MI+1] - Mpntr[MI];
1451 blockcolumns = mpntr[mI+1] - mpntr[mI];
1452 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping 'preceding' block rows .. */
1453 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping 'preceding' block columns .. */
1454 baserow = Mpntr[MI];
1455 basecolumn = mpntr[mI];
1456 mtxAp->bindx [ K ] = mI; /* a 'new' block */
1457 indptr[ K+1 ]=indptr[ K ] + blockrows * blockcolumns;
1458
1459 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1460 if( rsb__have_linked_storage(mtxAp->flags) )
1461 {
1462 if(RSB_WANT_VERBOSE_MESSAGES)
1463 RSB_INFO("initializing linked lists stuff.\n");
1464 if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1465 RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),mI,MI,blockcolumns,blockrows,basecolumn,baserow)
1466 else
1467 RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),MI,mI,blockrows,blockcolumns,baserow,basecolumn)
1468 }
1469 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1470
1471 /*
1472 dst = mtxAp->VA;
1473 dst += RSB_BLOCK_OFFSET(mtxAp,K);
1474 {rsb_blk_idx_t ibo = 0;
1475 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1476 ibo = RSB_GET_INTRA_BLOCK_OFFSET(mIndx[k],MIndx[k],mI,MI,mtxAp) ;
1477 else
1478 ibo = RSB_GET_INTRA_BLOCK_OFFSET(MIndx[k],mIndx[k],MI,mI,mtxAp) ;
1479 dst += ibo;}
1480 src = ((rsb_byte_t*)VA) + mtxAp->el_size * k;
1481 RSB_NUMERICAL_TYPE_SET_ELEMENT(dst,src,mtxAp->typecode);*/
1482
1483 while(RSB_LIKELY(k<nnz))
1484 {
1485 #ifdef DEBUG
1486 if( MIndx[k] < baserow )
1487 {
1488 RSB_ERROR("k=%d : (%d %d) is not ok\n",k, MIndx[k]+1,mIndx[k]+1);
1489 RSB_STDERR("(minor dim. index %d < base row %d)\n",MIndx[k] , baserow);
1490 errval = RSB_ERR_INTERNAL_ERROR;
1491 goto err;/* NOTE : this jump could be evil */
1492 }
1493 #endif /* DEBUG */
1494
1495 if( mIndx[k] >= basecolumn+blockcolumns )
1496 {
1497 /* new block column, for sure */
1498 while( mIndx[k] >= mpntr[mI+1] )++mI;
1499 blockcolumns = mpntr[mI+1] - mpntr[mI];
1500 basecolumn = mpntr[mI];
1501
1502 if( MIndx[k] >= baserow+blockrows )
1503 {
1504 /* new block row */
1505 while( MIndx[k] >= Mpntr[MI+1] )++MI;
1506 blockrows = Mpntr[MI+1] - Mpntr[MI];
1507 baserow = Mpntr[MI];
1508 }
1509 else
1510 {
1511 /* same block row */
1512 }
1513 ++K;
1514 mtxAp->bindx [ K ] = mI; /* a 'new' block */
1515 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
1516 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1517 if( rsb__have_linked_storage(mtxAp->flags) )
1518 {
1519 if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1520 RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),mI,MI,blockcolumns,blockrows,basecolumn,baserow)
1521 else
1522 RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),MI,mI,blockrows,blockcolumns,baserow,basecolumn)
1523 }
1524 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1525 }
1526 else
1527 if( MIndx[k] >= baserow+blockrows )
1528 {
1529 /* new row block, for sure */
1530 while( MIndx[k] >= Mpntr[MI+1] )++MI;
1531 blockrows = Mpntr[MI+1] - Mpntr[MI];
1532 baserow = Mpntr[MI];
1533
1534 if( mIndx[k] < basecolumn )
1535 {
1536 /* new row block, new block column */
1537 mI = 0;
1538 while( mIndx[k] >= mpntr[mI+1] )++mI;
1539 blockcolumns = mpntr[mI+1] - mpntr[mI];
1540 basecolumn = mpntr[mI];
1541 }
1542 else
1543 {
1544 /* new row block, same column */
1545 }
1546 ++K;
1547 mtxAp->bindx [ K ] = mI; /* a 'new' block */
1548 indptr[ K+1 ] = indptr[ K ] + blockrows * blockcolumns;
1549 #ifdef RSB_FLAG_WANT_LINKED_STORAGE
1550 if( rsb__have_linked_storage(mtxAp->flags) )
1551 {
1552 if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1553 RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),mI,MI,blockcolumns,blockrows,basecolumn,baserow)
1554 else
1555 RSB_BLOCK_TRAILING_STRUCT_SET(RSB_BLOCK_TRAILING_STRUCT_GET(mtxAp,K),MI,mI,blockrows,blockcolumns,baserow,basecolumn)
1556 }
1557 #endif /* RSB_FLAG_WANT_LINKED_STORAGE */
1558 }
1559 else
1560 {
1561 /* same block row for sure */
1562 }
1563 dst = mtxAp->VA;
1564 dst += RSB_BLOCK_OFFSET(mtxAp,K);
1565 {
1566 rsb_nnz_idx_t ibo = 0;
1567 if(RSB_UNLIKELY(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER))
1568 ibo = RSB_GET_INTRA_BLOCK_OFFSET(mIndx[k],MIndx[k],mI,MI,mtxAp) ;
1569 else
1570 ibo = RSB_GET_INTRA_BLOCK_OFFSET(MIndx[k],mIndx[k],MI,mI,mtxAp) ;
1571 dst += ibo;
1572 }
1573 //RSB_ERROR("%d %d %d\n",((rsb_byte_t*)dst)-((rsb_byte_t*)mtxAp->VA),MIndx[k],mIndx[k]);
1574 src = ((rsb_byte_t*)VA) + mtxAp->el_size * k;
1575 RSB_NUMERICAL_TYPE_SET_ELEMENT(dst,src,mtxAp->typecode);
1576 ++k;
1577 }
1578
1579 if(nnz)++K;
1580 mtxAp->bindx[K] = 0; // the first element off the 'working' bindx should be set to a safe value
1581
1582 if(mtxAp->flags & RSB_FLAG_SHOULD_DEBUG)
1583 if( K != mtxAp->block_count )
1584 {
1585 RSB_ERROR("K is %zd ! should be %zd (block count)!\n",(size_t)K,(size_t)mtxAp->block_count);
1586 RSB_STDERR("nnz : %zd\n",(size_t)nnz);
1587 RSB_STDERR("k : %zd\n",(size_t)k);
1588 errval = RSB_ERR_INTERNAL_ERROR;
1589 goto err;
1590 }
1591 err:
1592 RSB_DO_ERR_RETURN(errval)
1593 }
1594
rsb__do_account_sorted(struct rsb_mtx_t * mtxAp,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_nnz_idx_t * elements_per_block_row,rsb_nnz_idx_t * blocks_per_block_row)1595 rsb_err_t rsb__do_account_sorted( struct rsb_mtx_t * mtxAp, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_nnz_idx_t * elements_per_block_row, rsb_nnz_idx_t * blocks_per_block_row)
1596 {
1597 /*!
1598 * \ingroup gr_internals
1599 *
1600 * The routine for inserting BCSR sorted coo elements in a fresh matrix.
1601 * It is not optimized, and it should be used as a debug resort when tuning optimized ones.
1602 *
1603 * FIXME : this code is deprecated in favour of rsb__do_account_sorted_optimized
1604 * FIXME : does not support lots of flags!
1605 */
1606 rsb_coo_idx_t blockrows = 0;
1607 rsb_coo_idx_t blockcolumns = 0;
1608 rsb_coo_idx_t baserow = 0;
1609 rsb_coo_idx_t basecolumn = 0;
1610 const rsb_coo_idx_t *Mpntr = NULL;
1611 const rsb_coo_idx_t *mpntr = NULL;
1612 const rsb_coo_idx_t *MIndx = NULL;
1613 const rsb_coo_idx_t *mIndx = NULL;
1614 rsb_blk_idx_t mI = 0,MI = 0;
1615 rsb_err_t errval = RSB_ERR_NO_ERROR;
1616 rsb_nnz_idx_t k = 0; /* will index a nnz sized array */
1617 rsb_nnz_idx_t K = 0;
1618 k = mI = MI = K=0;
1619
1620 if( ! IA || ! JA
1621 #if !RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
1622 || !pinfop
1623 #endif /* RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS */
1624 )
1625 {
1626 errval = RSB_ERR_BADARGS;goto err;
1627 }
1628
1629 if(mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1630 {
1631 mpntr = pinfop->rpntr;
1632 Mpntr = pinfop->cpntr;
1633 mIndx = IA;
1634 MIndx = JA;
1635 }
1636 else
1637 {
1638 Mpntr = pinfop->rpntr;
1639 mpntr = pinfop->cpntr;
1640 MIndx = IA;
1641 mIndx = JA;
1642 }
1643
1644 while( MIndx[k] >= Mpntr[MI+1] )++MI; /* skipping 'preceding' block rows .. */
1645 while( mIndx[k] >= mpntr[mI+1] )++mI; /* skipping 'preceding' block columns .. */
1646 blockrows = Mpntr[MI+1] - Mpntr[MI];
1647 blockcolumns = mpntr[mI+1] - mpntr[mI];
1648 baserow = Mpntr[MI];
1649 basecolumn = mpntr[mI];
1650 elements_per_block_row[MI*0] += blockrows * blockcolumns;
1651 blocks_per_block_row[MI] += 1;
1652
1653 while(RSB_LIKELY(k<nnz))
1654 {
1655 #ifdef DEBUG
1656 if( MIndx[k] < baserow )
1657 {
1658 RSB_ERROR("k=%d : (%d %d) is not ok\n",k, MIndx[k]+1,mIndx[k]+1);
1659 RSB_STDERR("(minor dim. index %d < base row %d)\n",MIndx[k] , baserow);
1660 errval = RSB_ERR_INTERNAL_ERROR;
1661 goto err;
1662 }
1663 #endif /* DEBUG */
1664
1665 if( mIndx[k] >= basecolumn+blockcolumns )
1666 {
1667 /* new block column, for sure */
1668 while( mIndx[k] >= mpntr[mI+1] )++mI;
1669 blockcolumns = mpntr[mI+1] - mpntr[mI];
1670 basecolumn = mpntr[mI];
1671
1672 if( MIndx[k] >= baserow+blockrows )
1673 {
1674 /* new block row */
1675 while( MIndx[k] >= Mpntr[MI+1] )++MI;
1676 blockrows = Mpntr[MI+1] - Mpntr[MI];
1677 baserow = Mpntr[MI];
1678 }
1679 else
1680 {
1681 /* same block row */
1682 }
1683 elements_per_block_row[MI*0] += blockrows * blockcolumns;
1684 blocks_per_block_row[MI] += 1;
1685 ++K;
1686 }
1687 else
1688 if( MIndx[k] >= baserow+blockrows )
1689 {
1690 /* new row block, for sure */
1691 while( MIndx[k] >= Mpntr[MI+1] )++MI;
1692 blockrows = Mpntr[MI+1] - Mpntr[MI];
1693 baserow = Mpntr[MI];
1694
1695 if( mIndx[k] < basecolumn )
1696 {
1697 /* new row block, new block column */
1698 mI = 0;
1699 while( mIndx[k] >= mpntr[mI+1] )++mI;
1700 blockcolumns = mpntr[mI+1] - mpntr[mI];
1701 basecolumn = mpntr[mI];
1702 }
1703 else
1704 {
1705 /* new row block, same column */
1706 }
1707 /* get rid of this var : elements_per_block_row */
1708 elements_per_block_row[MI*0] += blockrows * blockcolumns;
1709 blocks_per_block_row[MI] += 1;
1710 ++K;
1711 }
1712 else
1713 {
1714 /* same block row for sure */
1715 }
1716 ++k;
1717 }
1718 err:
1719 RSB_DO_ERR_RETURN(errval)
1720 }
1721
rsb__allocate_css_from_coo_sorted(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t m,rsb_coo_idx_t k,struct rsb_options_t * o,rsb_type_t typecode,rsb_flags_t flags,rsb_err_t * errvalp)1722 struct rsb_mtx_t * rsb__allocate_css_from_coo_sorted( void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t m, rsb_coo_idx_t k, struct rsb_options_t * o, rsb_type_t typecode, rsb_flags_t flags, rsb_err_t *errvalp)
1723 {
1724 /*!
1725 * \ingroup gr_internals
1726 * The routine for matrix building from sorted coo elements. CSR only.
1727 *
1728 * FIXME : EXPERIMENTAL, UNFINISHED
1729 * FIXME : FIX THIS FUNCTION TO ALLOCATE CSR/CSC EVEN IF NOT IN PLACE
1730 * */
1731 // rsb_nnz_idx_t n = 0;
1732 rsb_nnz_idx_t * elements_per_block_row = NULL;
1733 rsb_time_t t = RSB_TIME_ZERO;
1734 struct rsb_mtx_t *mtxAp = NULL;
1735 rsb_err_t errval = RSB_ERR_NO_ERROR;
1736 /* const rsb_coo_idx_t *MIndx = NULL, *mIndx = NULL; */
1737 rsb_blk_idx_t MI = 0;
1738
1739 if(!errvalp)
1740 return NULL;
1741
1742 if(!( flags & RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR ))
1743 {
1744 return NULL;
1745 }
1746 if(flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1747 return NULL;/* FIXME : only csr now */
1748
1749 pinfop = NULL;/* FIXME */
1750
1751 if(!o) {errval = RSB_ERR_BADARGS;goto err;}
1752 rsb__init_struct(mtxAp = rsb__calloc(sizeof(*mtxAp)));
1753 if(!mtxAp){errval = RSB_ERR_ENOMEM;goto err;}
1754 if((errval = rsb__set_init_flags_and_stuff(mtxAp,o,pinfop,m,k,0,0,0,typecode,flags))!=RSB_ERR_NO_ERROR)goto err;
1755 /*
1756 if(flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1757 mIndx = IA, MIndx = JA;
1758 else
1759 MIndx = IA, mIndx = JA;
1760 */
1761 elements_per_block_row = rsb__calloc(sizeof(rsb_nnz_idx_t)*(1+mtxAp->Mdim));
1762
1763 if(!elements_per_block_row)
1764 {
1765 errval = RSB_ERR_ENOMEM;
1766 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1767 }
1768 t = - rsb_time();
1769
1770 RSB_DEBUG_ASSERT(rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,nnz,typecode,pinfop,flags)==RSB_ERR_NO_ERROR);
1771 RSB_DEBUG_ASSERT(rsb__util_are_valid_coo_arrays(IA,JA,nnz)==RSB_ERR_NO_ERROR);
1772
1773 errval = rsb__do_account_sorted_optimized(mtxAp,IA,JA,m,k,nnz,NULL,elements_per_block_row,NULL);
1774 mtxAp->block_count = 0;
1775
1776 mtxAp->block_count = nnz;
1777
1778 t += rsb_time();
1779 mtxAp->sat = t;
1780
1781 mtxAp->indptr = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1));
1782 mtxAp->bindx = JA; /* ok, done :) FIXME : should type - convert ... */
1783
1784 if(!mtxAp->bindx ){ errval = RSB_ERR_ENOMEM; goto err;}
1785 if(!mtxAp->indptr){ errval = RSB_ERR_ENOMEM; goto err;}
1786
1787 mtxAp->indptr[0] = 0;/* */
1788
1789 mtxAp->bpntr = IA;
1790 mtxAp->bpntr [0] = 0;
1791 // mtxAp->bpntr = NULL;
1792 for(MI=0;MI<mtxAp->Mdim;++MI) mtxAp->bpntr[MI+1]= mtxAp->bpntr[MI]+ elements_per_block_row[MI];
1793
1794 #if RSB_WANT_BITMAP
1795 mtxAp->options = o ;
1796 #endif /* RSB_WANT_BITMAP */
1797 mtxAp->nnz = nnz;
1798 mtxAp->element_count = nnz;
1799 mtxAp->VA = VA;
1800 t = - rsb_time();
1801 errval = rsb__do_insert_sorted_optimized(mtxAp,VA,IA,JA,nnz,NULL);
1802 if(RSB_SOME_ERROR(errval))
1803 goto err;
1804 t += rsb_time();
1805 mtxAp->eit = t;
1806 RSB_CONDITIONAL_FREE(elements_per_block_row);
1807 return mtxAp;
1808 err:
1809 RSB_STDERR("rsb__allocate_from_coo_sorted:\n");
1810 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
1811 if(mtxAp)
1812 rsb__do_mtx_free(mtxAp); /* destroys all of the internals of matrix */
1813 RSB_CONDITIONAL_FREE(elements_per_block_row);
1814 return NULL;
1815 }
1816
rsb__allocate_from_coo_sorted(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t m,rsb_coo_idx_t k,struct rsb_options_t * o,rsb_type_t typecode,rsb_flags_t flags,rsb_err_t * errvalp)1817 struct rsb_mtx_t * rsb__allocate_from_coo_sorted( const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t m, rsb_coo_idx_t k, struct rsb_options_t * o, rsb_type_t typecode, rsb_flags_t flags, rsb_err_t *errvalp)
1818 {
1819 /*!
1820 * \ingroup gr_internals
1821 * The routine for matrix building from sorted coo elements.
1822 * This function requires the coefficients to be sorted accordingly to the inter block ordering policy.
1823 *
1824 * \param pinfop is the pointer to a rsb_mtx_partitioning_info_t structure with partitioning information.
1825 *
1826 * \note : should behave well with the flags: RSB_FLAG_WANT_FIXED_BLOCKING_VBR RSB_FLAG_SORTED_INPUT
1827 * \note : this function should be optimized and tested thoroughly.
1828 * */
1829
1830 struct rsb_mtx_t *mtxAp = NULL;
1831 rsb_blk_idx_t MI = 0;
1832 rsb_err_t errval = RSB_ERR_NO_ERROR;
1833 /* rsb_coo_idx_t blockcolumns = 0; */
1834
1835 const rsb_coo_idx_t *MIndx = NULL;
1836
1837 rsb_nnz_idx_t * elements_per_block_row = NULL;
1838 rsb_nnz_idx_t * blocks_per_block_row = NULL; /* per major dimension .. */
1839 size_t element_count = 0;
1840
1841 rsb_time_t t = RSB_TIME_ZERO;
1842
1843 #if !RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS
1844 if(!pinfop)
1845 {
1846 errval = RSB_ERR_BADARGS;goto err;
1847 }
1848 #endif /* RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS */
1849 if( flags & RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR )
1850 {errval = RSB_ERR_BADARGS;goto err;}
1851
1852 // if(!o)
1853 // {
1854 // errval = RSB_ERR_BADARGS;goto err;
1855 // }
1856
1857 blocks_per_block_row = NULL; /* per major dimension .. */
1858 element_count = 0;
1859
1860 rsb__init_struct(mtxAp = rsb__calloc(sizeof(struct rsb_mtx_t)));
1861 if(!mtxAp){errval = RSB_ERR_ENOMEM;goto err;}
1862
1863 if((errval = rsb__set_init_flags_and_stuff(mtxAp,o,pinfop,m,k,0,0,0,typecode,flags))!=RSB_ERR_NO_ERROR)
1864 goto err;
1865 if(flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
1866 MIndx = JA;
1867 else
1868 MIndx = IA;
1869
1870 /* FIXME : elements_per_block_row can be replaced with a single variable */
1871 elements_per_block_row = rsb__calloc(sizeof(rsb_nnz_idx_t)*(1+mtxAp->Mdim));
1872 blocks_per_block_row = rsb__calloc(sizeof(rsb_nnz_idx_t)*(1+mtxAp->Mdim));
1873
1874 if(!blocks_per_block_row )
1875 {
1876 errval = RSB_ERR_ENOMEM;
1877 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1878 }
1879 if(!elements_per_block_row)
1880 {
1881 errval = RSB_ERR_ENOMEM;
1882 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1883 }
1884
1885 t = - rsb_time();
1886 blocks_per_block_row++;/* we increment the pointer for 1 element (we will use this array as bpntr later)*/
1887 errval = rsb__do_account_sorted_optimized(mtxAp,IA,JA,m,k,nnz,pinfop,elements_per_block_row,blocks_per_block_row);
1888 if(RSB_SOME_ERROR(errval))
1889 goto err;
1890
1891 if(nnz==0)blocks_per_block_row[0] = 0;/* handling the degenerate nnz == 0 case (e.g.: unit diag) */
1892 mtxAp->block_count = 0;
1893 element_count = 0;
1894
1895 for(MI=0;MI<mtxAp->Mdim;++MI)mtxAp->block_count += blocks_per_block_row [MI];
1896 for(MI=0;MI<mtxAp->Mdim;++MI)element_count += elements_per_block_row[MI];
1897
1898 t += rsb_time();
1899 mtxAp->sat = t;
1900 if(RSB_WANT_VERBOSE_MESSAGES)
1901 RSB_STDERR("matrix creation phase 1 (accounting) : %lf seconds \n", t);
1902
1903 mtxAp->indptr = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1));
1904 mtxAp->bindx = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mtxAp->block_count+1));
1905
1906 if(!mtxAp->bindx ){ errval = RSB_ERR_ENOMEM; goto err;}
1907 if(!mtxAp->indptr){ errval = RSB_ERR_ENOMEM; goto err;}
1908
1909 mtxAp->indptr[0] = 0;/* */
1910
1911 mtxAp->bpntr = (--blocks_per_block_row); /* :) */
1912 for(MI=0;MI<mtxAp->Mdim;++MI)
1913 mtxAp->bpntr[MI+1] += mtxAp->bpntr[MI]; /* in this way bpntr[i] has the count of blocks before row i */
1914 mtxAp->bpntr [0] = 0;
1915 blocks_per_block_row = NULL; /* it will be freed with the matrix */
1916
1917 /* second pass : we have allocated the needed arrays and are ready to fill in data structures */
1918
1919 #if RSB_WANT_BITMAP
1920 mtxAp->options = o ;
1921 #endif /* RSB_WANT_BITMAP */
1922 mtxAp->nnz = nnz;
1923 mtxAp->element_count = element_count;
1924 //mtxAp->block_count = block_count;
1925
1926 if(mtxAp->block_count > nnz)
1927 {
1928 errval = RSB_ERR_INTERNAL_ERROR;
1929 RSB_STDERR("more blocks (%zd) than nonzeros (%zd) ?could be a bug!\n",(size_t)mtxAp->block_count,(size_t)nnz);
1930 goto err;
1931 }
1932
1933 mtxAp->bpntr[0] = 0;
1934 mtxAp->VA = rsb__malloc( RSB_TOTAL_BLOCK_BYTES(mtxAp,o));
1935
1936 if(RSB_WANT_VERBOSE_MESSAGES)
1937 RSB_INFO("allocating %zd bytes.\n",(size_t)RSB_TOTAL_BLOCK_BYTES(mtxAp,o) );
1938
1939 if(!mtxAp->VA)
1940 {
1941 errval = RSB_ERR_ENOMEM;
1942 RSB_STDERR("had problems allocating %zd bytes.\n",(size_t)RSB_TOTAL_BLOCK_BYTES(mtxAp,o));
1943 goto err;
1944 }
1945 // k = 0;/* nnz index */
1946
1947 t = - rsb_time();
1948
1949 /* the following code could run parallel with some work */
1950 errval = rsb__do_insert_sorted_optimized( mtxAp, VA, IA, JA, nnz, pinfop);
1951
1952 if(RSB_SOME_ERROR(errval))
1953 goto err;
1954 t += rsb_time();
1955 mtxAp->eit = t;
1956 if(RSB_WANT_VERBOSE_MESSAGES)
1957 RSB_STDERR("matrix creation phase 2 (insertion) : %lf seconds \n", mtxAp->eit);
1958 #if 0
1959 if((flags & RSB_FLAG_SHOULD_DEBUG) && 0)
1960 {
1961 register rsb_coo_idx_t baserow,basecolumn,rows,columns;
1962 register rsb_blk_idx_t blockrow,blockcolumn;
1963 register rsb_byte_t*bp;
1964
1965 /* FIXME : will fail if pure bcsr */
1966 RSB_GET_FIRST_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1967 if(0 /* super paranoia */)
1968 while(!RSB_GOT_LAST_BLOCK_POINTER(mtxAp))
1969 {
1970 RSB_INFO("%zd / %zd ; block (%zd %zd)/(%zd %zd) base : (%zd %zd) size : (%zd %zd)\n",
1971 (rsb_printf_int_t)_lastk,(rsb_printf_int_t)mtxAp->block_count,
1972 (rsb_printf_int_t)blockrow,(rsb_printf_int_t)blockcolumns,
1973 (rsb_printf_int_t)pinfop->M_b,(rsb_printf_int_t)pinfop->K_b,
1974 (rsb_printf_int_t)baserow,(rsb_printf_int_t)basecolumn,(rsb_printf_int_t)rows,(rsb_printf_int_t)columns);
1975 RSB_GET_NEXT_BLOCK_POINTER(bp,mtxAp,baserow,basecolumn,rows,columns,blockrow,blockcolumn);
1976 }
1977 }
1978 #endif
1979
1980 RSB_CONDITIONAL_FREE(elements_per_block_row);
1981 return mtxAp;
1982 err:
1983 RSB_STDERR("rsb__allocate_from_coo_sorted:\n");
1984 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
1985 if(mtxAp)
1986 rsb__do_mtx_free(mtxAp); /* destroys all of the internals of matrix */
1987 if(blocks_per_block_row != MIndx )
1988 RSB_CONDITIONAL_FREE(blocks_per_block_row );
1989 RSB_CONDITIONAL_FREE(elements_per_block_row);
1990 return NULL;
1991 }
1992
rsb__do_get_blocking_from_pinfo(const struct rsb_mtx_partitioning_info_t * pinfop,rsb_flags_t flags,rsb_blk_idx_t * mbp,rsb_blk_idx_t * kbp)1993 rsb_err_t rsb__do_get_blocking_from_pinfo(const struct rsb_mtx_partitioning_info_t * pinfop, rsb_flags_t flags, rsb_blk_idx_t *mbp, rsb_blk_idx_t *kbp)
1994 {
1995 if( ( flags & RSB_FLAG_WANT_BCSS_STORAGE ) || ( flags & RSB_FLAG_WANT_FIXED_BLOCKING_VBR ) )
1996 {
1997 if( pinfop && pinfop->cpntr && pinfop->rpntr )
1998 {
1999 /* FIXME : experimental */
2000 *kbp = pinfop->cpntr[1]-pinfop->cpntr[0];
2001 *mbp = pinfop->rpntr[1]-pinfop->rpntr[0];
2002 }
2003 else
2004 if(pinfop)
2005 {
2006 #if RSB_EXPERIMENTAL_USE_PURE_BCSS
2007 *mbp = pinfop->br;
2008 *kbp = pinfop->bc;
2009 #else /* RSB_EXPERIMENTAL_USE_PURE_BCSS */
2010 *kbp = *mbp = -1;
2011 #endif /* RSB_EXPERIMENTAL_USE_PURE_BCSS */
2012 }
2013 else
2014 {
2015 *kbp = *mbp = 1;
2016 }
2017 RSB_DEBUG_ASSERT(*kbp>=1);
2018 RSB_DEBUG_ASSERT(*mbp>=1);
2019 if( *kbp<1 || *mbp <1 )
2020 {
2021 return RSB_ERR_BADARGS;
2022 }
2023 }
2024 return RSB_ERR_NO_ERROR;
2025 }
2026
rsb__util_strlen(const rsb_char_t * s)2027 size_t rsb__util_strlen(const rsb_char_t *s)
2028 {
2029 /*!
2030 * \ingroup gr_internals
2031 */
2032 return strlen(s);/* Flawfinder: ignore */
2033 }
2034
2035 #if 0
2036 static int rsb_util_sprintf(rsb_char_t *str, const rsb_char_t *format, ...)
2037 {
2038 /*!
2039 * \ingroup gr_internals
2040 * FIXME : BUGGY
2041 * */
2042 va_list ap;
2043 rsb_err_t errval = RSB_ERR_NO_ERROR;
2044 va_start(ap,format);
2045 errval = rsb__sprintf(str,format,ap);/* Flawfinder: ignore */
2046 va_end(ap);
2047 RSB_DO_ERR_RETURN(errval)
2048 }
2049 #endif
2050
rsb_util_strcat(rsb_char_t * dest,const rsb_char_t * src)2051 static rsb_char_t *rsb_util_strcat(rsb_char_t *dest, const rsb_char_t *src)
2052 {
2053 /*!
2054 * A wrapper.
2055 * \ingroup gr_internals
2056 */
2057 return strcat(dest,src); /* Flawfinder: ignore */
2058 }
2059
rsb__sprint_matrix_implementation_code2(const struct rsb_mtx_t * mtxAp,rsb_char_t * buf,rsb_flags_t inflags)2060 const rsb_char_t * rsb__sprint_matrix_implementation_code2(const struct rsb_mtx_t *mtxAp, rsb_char_t * buf, rsb_flags_t inflags)
2061 {
2062 /*!
2063 * \ingroup gr_internals
2064 * FIXME : missing error handling
2065 * buf be at least RSB_CONST_MATRIX_IMPLEMENTATION_CODE_STRING_MAX_LENGTH chars long.
2066 */
2067 const rsb_char_t sep[] = "\t";
2068 rsb_blk_idx_t br,bc;
2069 if(!mtxAp) return NULL;
2070 buf[0] = '\0';
2071
2072 rsb__get_blocking_size(mtxAp,&br,&bc);
2073
2074 /* NOTE : assumes BCSR or takes into account only the first blocks */
2075 rsb__sprintf(buf+rsb__util_strlen(buf),"%ld%s%ld%s",(long)mtxAp->nr,sep,(long)mtxAp->nc,sep);
2076 rsb__sprintf(buf+rsb__util_strlen(buf),"%ld%s%ld%s",(long)br,sep,(long)bc,sep);
2077 rsb__sprintf(buf+rsb__util_strlen(buf),"%zd%s%lg",(size_t)rsb__do_get_matrix_nnz(mtxAp),sep,rsb__do_get_matrix_fillin(mtxAp));
2078
2079 return buf;
2080 }
2081
rsb__do_get_symmetry_char(const struct rsb_mtx_t * mtxAp)2082 rsb_char_t rsb__do_get_symmetry_char(const struct rsb_mtx_t *mtxAp)
2083 {
2084 /*!
2085 * \ingroup gr_internals
2086 */
2087 if(rsb__is_symmetric(mtxAp))
2088 return 'S';
2089 else
2090 if(rsb__is_hermitian(mtxAp))
2091 return 'H';
2092 else
2093 return 'G';
2094 }
2095
rsb_do_get_symmetry_string(const struct rsb_mtx_t * mtxAp,rsb_char_t * auxbuf)2096 static const rsb_char_t * rsb_do_get_symmetry_string(const struct rsb_mtx_t *mtxAp, rsb_char_t * auxbuf)
2097 {
2098 /*!
2099 * \ingroup gr_internals
2100 */
2101 const rsb_char_t * s = "Symmetric";
2102 const rsb_char_t * g = "General";
2103 const rsb_char_t * h = "Hermitian";
2104
2105 if(rsb__is_symmetric(mtxAp))
2106 rsb__strcpy(auxbuf,s);
2107 else
2108 if(rsb__is_hermitian(mtxAp))
2109 rsb__strcpy(auxbuf,h);
2110 else
2111 rsb__strcpy(auxbuf,g);
2112 return auxbuf;
2113 }
2114
rsb__fprint_matrix_implementation_code(const struct rsb_mtx_t * mtxAp,const rsb_char_t * op,rsb_flags_t inflags,FILE * fd)2115 rsb_err_t rsb__fprint_matrix_implementation_code(const struct rsb_mtx_t *mtxAp, const rsb_char_t * op, rsb_flags_t inflags, FILE*fd)
2116 {
2117 rsb_err_t errval = RSB_ERR_NO_ERROR;
2118 rsb_char_t buf[RSB_CONST_MATRIX_IMPLEMENTATION_CODE_STRING_MAX_LENGTH];/* Flawfinder: ignore */
2119
2120 fprintf( fd, "%s", rsb__sprint_matrix_implementation_code(mtxAp,op,inflags,buf));
2121 return errval;
2122 }
2123
rsb__cat_compver(rsb_char_t * buf)2124 void rsb__cat_compver(rsb_char_t * buf)
2125 {
2126 /* FIXME : fix rsb_util_sprintf and use it ! */
2127 #if defined(__INTEL_COMPILER)
2128 /* icc 10.10 is ok */
2129 rsb__sprintf(buf,"intel-%d",__INTEL_COMPILER);
2130 #elif defined(__xlC__)
2131 /* ok on sp5 */
2132 rsb__sprintf(buf,"xlc-%d",__xlC__);
2133 #elif defined(__PGI)
2134 /* pgcc-7.0.4 is ok */
2135 rsb__sprintf(buf,"pgcc-%d.%d.%d",__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__);
2136 #elif defined(__clang__)
2137 rsb__sprintf(buf,"clang-%d.%d.%d",__clang_major__,__clang_minor__,__clang_patchlevel__);
2138 /* see also __clang_version__ */
2139 #elif defined(__FUJITSU)
2140 rsb__sprintf(buf,"fujitsu-%d.%d.%d"/*"(%s)"*/,__FCC_major__,__FCC_minor__,__FCC_patchlevel__/*,__FCC_version__*/);
2141 #elif defined(__GNUC__)
2142 rsb__sprintf(buf,"gcc-%d.%d",__GNUC__,__GNUC_MINOR__);
2143 #elif defined(__SUNPRO_CC)
2144 rsb__sprintf(buf,"sun-%d",__SUNPRO_CC);
2145 #else /* __SUNPRO_CC */
2146 rsb_util_strcat(buf,"CC?");
2147 #endif /* __SUNPRO_CC */
2148 }
2149
rsb__sprint_matrix_implementation_code(const struct rsb_mtx_t * mtxAp,const rsb_char_t * op,rsb_flags_t inflags,rsb_char_t * buf)2150 const rsb_char_t * rsb__sprint_matrix_implementation_code(const struct rsb_mtx_t *mtxAp, const rsb_char_t * op, rsb_flags_t inflags, rsb_char_t * buf)
2151 {
2152 /*!
2153 * \ingroup gr_internals
2154 *
2155 * Gives back a matrix implementation info string.
2156 * NOTE: for more consistency, we should translate any \t (TAB) char in ' '.
2157 * NOTE: it will give some more info, too..
2158 *
2159 * \return a static string pointer on correct operation, NULL otherwise
2160 * */
2161
2162 rsb_char_t sep[] = "/";
2163 const rsb_char_t * csp;
2164 rsb_long_t sm = 0;
2165 rsb_long_t tsm = 0;
2166 rsb_flags_t flags = RSB_FLAG_NOFLAGS;
2167 rsb_blk_idx_t br = 0,bc = 0;
2168 rsb_char_t auxbuf[RSB_TOKLEN];
2169
2170 if(!mtxAp)
2171 return NULL;
2172
2173 rsb__get_blocking_size(mtxAp,&br,&bc);
2174
2175 flags = mtxAp->flags|inflags;
2176
2177 sm = rsb__submatrices(mtxAp);
2178 tsm = rsb__terminal_recursive_matrix_count(mtxAp);
2179
2180 buf[0] = '\0';
2181
2182 /* FIXME : DANGER */
2183
2184 if(1)
2185 {
2186 long hcoo = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2187 long hcsr = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2188 long fcoo = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2189 long fcsr = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2190 long kinds = 0;
2191 if(hcoo)++kinds;
2192 if(hcsr)++kinds;
2193 if(fcoo)++kinds;
2194 if(fcsr)++kinds;
2195 #if 0
2196 if(fcoo==0 && hcoo==0)
2197 rsb_util_strcat(buf,"CSR");
2198 else
2199 if(fcsr==0 && hcsr==0)
2200 rsb_util_strcat(buf,"COO");
2201 else
2202 #endif
2203 rsb_util_strcat(buf,"RSB");
2204 }
2205 else
2206 {
2207 if(rsb__is_recursive_matrix(flags))
2208 rsb_util_strcat(buf,"R");
2209
2210 #ifdef RSB_MATRIX_STORAGE_BCOR
2211 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCOR)
2212 {
2213 if(br==1&&bc==1)
2214 rsb_util_strcat(buf,"COR");
2215 else
2216 rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCOR_STRING);
2217 }
2218 else
2219 #endif /* RSB_MATRIX_STORAGE_BCOR */
2220 #ifdef RSB_MATRIX_STORAGE_BCOC
2221 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCOC)
2222 {
2223 if(br==1&&bc==1)
2224 rsb_util_strcat(buf,"COC");
2225 else
2226 rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCOC_STRING);
2227 }
2228 else
2229 #endif /* RSB_MATRIX_STORAGE_BCOC */
2230 #ifdef RSB_MATRIX_STORAGE_BCSR
2231 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCSR)
2232 {
2233 if(br==1&&bc==1)
2234 rsb_util_strcat(buf,"CSR");
2235 else
2236 rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCSR_STRING);
2237 }
2238 else
2239 #endif /* RSB_MATRIX_STORAGE_BCSR */
2240 #ifdef RSB_MATRIX_STORAGE_BCSC
2241 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_BCSC)
2242 {
2243 if(br==1&&bc==1)
2244 rsb_util_strcat(buf,"CSC");
2245 else
2246 rsb_util_strcat(buf,RSB_MATRIX_STORAGE_BCSC_STRING);
2247 }
2248 else
2249 #endif /* RSB_MATRIX_STORAGE_BCSC */
2250 #ifdef RSB_MATRIX_STORAGE_VBR
2251 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_VBR )
2252 rsb_util_strcat(buf,RSB_MATRIX_STORAGE_VBR_STRING);
2253 else
2254 #endif /* RSB_MATRIX_STORAGE_VBR */
2255 #ifdef RSB_MATRIX_STORAGE_VBC
2256 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_VBC )
2257 rsb_util_strcat(buf,RSB_MATRIX_STORAGE_VBC_STRING);
2258 else
2259 #endif /* RSB_MATRIX_STORAGE_VBC */
2260 #ifdef RSB_MATRIX_STORAGE_LR
2261 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_LR )
2262 rsb_util_strcat(buf,"LBLR");
2263 else
2264 #endif /* RSB_MATRIX_STORAGE_LR */
2265 #ifdef RSB_MATRIX_STORAGE_LC
2266 if(mtxAp->matrix_storage & RSB_MATRIX_STORAGE_LC )
2267 rsb_util_strcat(buf,"LBLC");
2268 else
2269 #endif /* RSB_MATRIX_STORAGE_LC */
2270 return NULL;
2271 }
2272 {
2273 // if(sm>=1 && rsb__is_recursive_matrix(mtxAp))/* NEW */ /* FIXME : rsb__is_recursive_matrix() seems plagued by indeterminism! */
2274 if(sm>=1 /*&& rsb__is_recursive_matrix(mtxAp)*/)/* NEW */
2275 rsb__sprintf(buf+rsb__util_strlen(buf),"(@:%ld/%ld;%3.1lf%%diagnz;%3.1lf%%diagblk)",sm,tsm,
2276 (((double)rsb__get_diagonal_elements_count(mtxAp)*100)/(mtxAp->nnz)),
2277 (((double)rsb__get_diagonal_submatrices_count(mtxAp)*100)/(tsm))
2278 );
2279 }
2280 #if 1
2281 /* uhm. this refers to inter block ordering. */
2282 if( mtxAp->flags & RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
2283 rsb_util_strcat(buf,"-C");
2284 else
2285 rsb_util_strcat(buf,"-R");
2286 #endif
2287 rsb_util_strcat(buf,sep);
2288 rsb_util_strcat(buf,"RowMajor");
2289 rsb_util_strcat(buf,sep);
2290 rsb_util_strcat(buf,rsb_do_get_symmetry_string(mtxAp,auxbuf));
2291 rsb_util_strcat(buf,sep);
2292 rsb_util_strcat(buf,op?op:"");
2293 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_AUTO_BLOCKING))
2294 rsb_util_strcat(buf,"-AutoBlocking");
2295 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR))
2296 rsb_util_strcat(buf,"-InPlace");
2297 #if 0
2298 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_USE_HALFWORD_INDICES_COO))
2299 rsb__sprintf(buf+rsb__util_strlen(buf),"-SwitchToHalfwordCoo:(%ld~%ld)"
2300 ,rsb__terminal_recursive_matrix_count_with_flags(mtxAp,RSB_FLAG_USE_HALFWORD_INDICES_COO)
2301 ,tsm);
2302 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_USE_HALFWORD_INDICES_CSR))
2303 rsb__sprintf(buf+rsb__util_strlen(buf),"-SwitchToHalfwordCsr:(%ld~%ld)"
2304 ,rsb__terminal_recursive_matrix_count_with_flags_but(mtxAp,RSB_FLAG_USE_HALFWORD_INDICES_CSR,RSB_FLAG_USE_HALFWORD_INDICES_COO)
2305 ,tsm);
2306 #else
2307 {
2308 long hcoo = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_COO);
2309 long hcsr = rsb__terminal_recursive_matrix_count_with_storage_and_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2310 long fcoo = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCOR,RSB_FLAG_USE_HALFWORD_INDICES_COO);
2311 long fcsr = rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(mtxAp,RSB_MATRIX_STORAGE_BCSR,RSB_FLAG_USE_HALFWORD_INDICES_CSR);
2312 rsb__sprintf(buf+rsb__util_strlen(buf),"-HalfwordCsr:(%ld~%ld)",hcsr,tsm);
2313 rsb__sprintf(buf+rsb__util_strlen(buf),"-FullwordCsr:(%ld~%ld)",fcsr,tsm);
2314 rsb__sprintf(buf+rsb__util_strlen(buf),"-HalfwordCoo:(%ld~%ld)",hcoo,tsm);
2315 rsb__sprintf(buf+rsb__util_strlen(buf),"-FullwordCoo:(%ld~%ld)",fcoo,tsm);
2316 }
2317 #endif
2318 #if 0
2319 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_RECURSIVE_HALF_DETECTED_CACHE))
2320 rsb_util_strcat(buf,"-BlockForHalfCache");
2321 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_RECURSIVE_DOUBLE_DETECTED_CACHE))
2322 rsb_util_strcat(buf,"-BlockForDoubleCache");
2323 #endif
2324 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_RECURSIVE_SUBDIVIDE_MORE_ON_DIAG))
2325 rsb_util_strcat(buf,"-ExtraDiagonalSubdivisions");
2326 #ifdef RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES
2327 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES))
2328 rsb_util_strcat(buf,"-NoMicroLeafs");
2329 #endif /* RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES */
2330
2331 rsb_util_strcat(buf,sep);
2332 RSB_NUMERICAL_TYPE_STRING(csp,mtxAp->typecode);
2333 rsb_util_strcat(buf,csp);
2334
2335 if(1)
2336 {
2337 rsb_thread_t ncores = 0;
2338 #if RSB_WANT_OMP_RECURSIVE_KERNELS
2339 #pragma omp parallel RSB_NTC
2340 if(omp_get_thread_num()==0)
2341 {
2342 ncores = omp_get_num_threads();
2343 }
2344 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
2345 ncores = ncores?ncores:1;
2346 rsb_util_strcat(buf,sep);
2347 rsb__sprintf(buf+rsb__util_strlen(buf),"cores:%d",ncores);
2348 }
2349 /* see http://gasnet.cs.berkeley.edu/dist/other/portable_platform.h */
2350 rsb_util_strcat(buf,sep);
2351 /* NOTE : on some systems, __GNUC__ is defined even under icc! (therefore we switched precedence) */
2352
2353 #if 0
2354 /* FIXME : fix rsb_util_sprintf and use it ! */
2355 #if defined(__INTEL_COMPILER)
2356 /* icc 10.10 is ok */
2357 rsb__sprintf(buf+rsb__util_strlen(buf),"intel-%d",__INTEL_COMPILER);
2358 #elif defined(__xlC__)
2359 /* ok on sp5 */
2360 rsb__sprintf(buf+rsb__util_strlen(buf),"xlc-%d",__xlC__);
2361 #elif defined(__PGI)
2362 /* pgcc-7.0.4 is ok */
2363 rsb__sprintf(buf+rsb__util_strlen(buf),"pgcc-%d.%d.%d",__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__);
2364 #elif defined(__GNUC__)
2365 rsb__sprintf(buf+rsb__util_strlen(buf),"gcc-%d.%d",__GNUC__,__GNUC_MINOR__);
2366 #elif defined(__SUNPRO_CC)
2367 rsb__sprintf(buf+rsb__util_strlen(buf),"sun-%d",__SUNPRO_CC);
2368 #else /* __SUNPRO_CC */
2369 rsb_util_strcat(buf,"CC?");
2370 #endif /* __SUNPRO_CC */
2371 #else
2372 rsb__cat_compver(buf+rsb__util_strlen(buf));
2373 #endif
2374 rsb_util_strcat(buf,sep);
2375 /* still missing CXX case */
2376 #if defined(CFLAGS)
2377 //rsb_util_sprintf(buf+rsb__util_strlen(buf),"%s",CFLAGS);
2378 rsb__sprintf(buf+rsb__util_strlen(buf),"%s",CFLAGS);
2379 #else /* CFLAGS */
2380 rsb_util_strcat(buf,"");
2381 #endif /* CFLAGS */
2382 /* NEW */
2383 rsb_util_strcat(buf,sep);
2384 rsb__sprintf(buf+rsb__util_strlen(buf),"sizeof(nnz_idx_t):%zd,",sizeof(rsb_nnz_idx_t));
2385 rsb__sprintf(buf+rsb__util_strlen(buf),"sizeof(coo_idx_t):%zd,",sizeof(rsb_coo_idx_t));
2386 rsb__sprintf(buf+rsb__util_strlen(buf),"sizeof(blk_idx_t):%zd",sizeof(rsb_blk_idx_t));
2387
2388 /* NEW */
2389 rsb_util_strcat(buf,sep);
2390 rsb__sprintf(buf+rsb__util_strlen(buf),"idx_storage:%zd-idx_storage_in_csr:%zd-idx_storage_in_coo:%zd"
2391 ,(size_t)rsb__get_index_storage_amount(mtxAp)
2392 ,((size_t)mtxAp->nnz)*sizeof(rsb_coo_idx_t)+((size_t)mtxAp->Mdim+1)*sizeof(rsb_nnz_idx_t)
2393 ,((size_t)mtxAp->nnz)*sizeof(rsb_coo_idx_t)*2
2394 );
2395
2396 rsb_util_strcat(buf,sep);
2397 #ifdef RSB_PACKAGE_VERSION
2398 rsb__sprintf(buf+rsb__util_strlen(buf),"version:%s",RSB_PACKAGE_VERSION);
2399 #endif /* RSB_PACKAGE_VERSION */
2400 rsb_util_strcat(buf,sep);
2401 rsb_util_strcat(buf,"memhinfo:[");
2402 {rsb_char_t usmhib[RSB_MAX_LINE_LENGTH];
2403 rsb_util_strcat(buf,rsb__get_mem_hierarchy_info_string(usmhib));}
2404 rsb_util_strcat(buf,"]");
2405 rsb_util_strcat(buf,sep);
2406 #ifdef RSB_HAVE_SYS_UTSNAME_H
2407 {
2408 struct utsname un;
2409 if(uname(&un)==0)
2410 rsb__sprintf(buf+rsb__util_strlen(buf),"%s",un.nodename);
2411 #if 0
2412 struct utsname {
2413 char sysname[];
2414 char nodename[];
2415 char release[];
2416 char version[];
2417 char machine[];
2418 #ifdef _GNU_SOURCE
2419 char domainname[];
2420 #endif /* _GNU_SOURCE */
2421 };
2422 #endif
2423 }
2424 #else /* RSB_HAVE_SYS_UTSNAME_H */
2425 rsb_util_strcat(buf,"");
2426 #endif /* RSB_HAVE_SYS_UTSNAME_H */
2427
2428 return buf;
2429 }
2430
rsb__util_get_bx_array(const rsb_char_t * optarg,int * bxlp,rsb_blk_idx_t ** bxvp)2431 rsb_err_t rsb__util_get_bx_array(const rsb_char_t* optarg, int* bxlp, rsb_blk_idx_t **bxvp)
2432 {
2433 /*!
2434 \ingroup gr_internals
2435
2436 Will extract block row and block column sizes from user data codified in optarg.
2437 \param bxlp will be set to the number of the desired block sizes.
2438 \param bxvp will be set to an array (of dimension *bxlp) allocated with rsb__malloc() with block sizes.
2439
2440 \note : there are subtle dangers in this function
2441 \note : if *bxvp is not NULL, it will be freed
2442 \todo : move to some file named parse.c
2443 */
2444 int bxl = 0;
2445 rsb_blk_idx_t * bxv = NULL;
2446 const rsb_char_t*c = optarg;
2447 int mint = 1,maxt = 1;
2448
2449 if(!bxlp || !bxvp)
2450 return RSB_ERR_BADARGS;
2451
2452 if(*optarg==':')
2453 {
2454 #if RSB_WANT_OMP_RECURSIVE_KERNELS
2455 maxt = omp_get_max_threads();
2456 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
2457 while(mint<=maxt) mint *= 2,++bxl;
2458 if( mint > maxt && mint/2 != maxt ) bxl++;
2459 mint = 1;
2460 }
2461 else
2462 do
2463 {
2464 int nul = 0;
2465 while(*c!=nul && !isdigit(*c))++c;
2466 if(isdigit(*c))bxl++;
2467 while(*c && isdigit(*c))++c;
2468 }while(*c);
2469
2470 bxv = *bxvp;
2471 if(bxv)rsb__free(bxv);
2472 bxv = rsb__malloc(sizeof(rsb_blk_idx_t)*(size_t)bxl);
2473 if(!bxv)goto err;
2474 bxl = 0;
2475 c = optarg;
2476
2477 if(*optarg==':')
2478 {
2479 while( mint <= maxt )
2480 bxv[bxl++] = mint, mint *= 2;
2481 if( bxv[bxl-1] != maxt )
2482 bxv[bxl++] = maxt;
2483 }
2484 else
2485 do
2486 {
2487 int nul = 0,ci;
2488 while(*c!=nul && !isdigit(*c))++c;
2489 {
2490 ci = rsb__util_atoi(c);/* Flawfinder: ignore */
2491 if(ci<1)goto err;
2492 if(isdigit(*c))bxv[bxl++] = (rsb_blk_idx_t)ci;
2493 }
2494 while(*c && isdigit(*c))++c;
2495 }while(*c);
2496
2497 *bxlp = bxl;
2498 *bxvp = bxv;
2499
2500 return RSB_ERR_NO_ERROR;
2501 err:
2502 RSB_CONDITIONAL_FREE(bxv);
2503 rsb__do_perror(NULL,RSB_ERR_GENERIC_ERROR);
2504 return RSB_ERR_GENERIC_ERROR;
2505 }
2506
2507 #if 0
2508 rsb_nnz_idx_t rsb__util_atonnz(const rsb_char_t * optarg)
2509 {
2510 /*!
2511 \ingroup gr_internals
2512
2513 Will parse a single rsb_nnz_idx_t number.
2514 \param optarg
2515
2516 \note : may overflow
2517 \warning : may overflow
2518 */
2519 rsb_long_t i = rsb__util_atol(optarg);/*Flawfinder: ignore */
2520 if(i<0)
2521 i = 0;
2522 return (rsb_nnz_idx_t)i;
2523 }
2524 #endif
2525
rsb__util_atol(const rsb_char_t * nptr)2526 rsb_long_t rsb__util_atol(const rsb_char_t *nptr)
2527 {
2528 /*!
2529 \ingroup gr_internals
2530 */
2531 return atol(nptr);/* Flawfinder: ignore */
2532 }
2533
rsb__util_atof(const rsb_char_t * nptr)2534 rsb_real_t rsb__util_atof(const rsb_char_t *nptr)
2535 {
2536 /*!
2537 \ingroup gr_internals
2538 */
2539 return atof(nptr);/* Flawfinder: ignore */
2540 }
2541
rsb__util_atoi(const rsb_char_t * nptr)2542 int rsb__util_atoi(const rsb_char_t *nptr)
2543 {
2544 /*!
2545 \ingroup gr_internals
2546 */
2547 int n = 0;
2548
2549 if(nptr)
2550 n = atoi(nptr);/* Flawfinder: ignore */
2551
2552 return n;
2553 }
2554
rsb__util_atoi_kmX(const rsb_char_t * nptr,int base)2555 static int rsb__util_atoi_kmX(const rsb_char_t *nptr, int base)
2556 {
2557 /*!
2558 \ingroup gr_internals
2559 */
2560 int v = rsb__util_atoi(nptr);
2561
2562 if(!nptr)
2563 goto ret;
2564 while(isdigit(*nptr))
2565 ++nptr;
2566 if(*nptr && tolower(*nptr)=='g')
2567 v *= base * base * base;
2568 if(*nptr && tolower(*nptr)=='m')
2569 v *= base * base;
2570 if(*nptr && tolower(*nptr)=='k')
2571 v *= base;
2572 ret:
2573 return v;
2574 }
2575
rsb__util_atoi_km2(const rsb_char_t * nptr)2576 int rsb__util_atoi_km2(const rsb_char_t *nptr)
2577 {
2578 return rsb__util_atoi_kmX(nptr, 1024);
2579 }
2580
rsb__util_atoi_km10(const rsb_char_t * nptr)2581 int rsb__util_atoi_km10(const rsb_char_t *nptr)
2582 {
2583 return rsb__util_atoi_kmX(nptr, 1000);
2584 }
2585
rsb__copy_css_arrays(const void * iVA,const rsb_coo_idx_t * iINDX,const rsb_coo_idx_t * iXA,const rsb_nnz_idx_t nnz,rsb_coo_idx_t X,rsb_type_t typecode,void * oVA,rsb_coo_idx_t * oINDX,rsb_nnz_idx_t * oXA)2586 rsb_err_t rsb__copy_css_arrays(const void *iVA, const rsb_coo_idx_t * iINDX, const rsb_coo_idx_t * iXA, const rsb_nnz_idx_t nnz, rsb_coo_idx_t X, rsb_type_t typecode, void *oVA, rsb_coo_idx_t * oINDX, rsb_nnz_idx_t * oXA)
2587 {
2588 if(!iVA || !iINDX || !iXA || RSB_INVALID_COO_INDEX(X) || RSB_INVALID_NNZ_INDEX(nnz) || !oVA || !oINDX || !oXA)
2589 return RSB_ERR_BADARGS;
2590 RSB_CSR_MEMCPY(oVA,oINDX,oXA,iVA,iINDX,iXA,nnz,X,RSB_SIZEOF(typecode));
2591 return RSB_ERR_NO_ERROR;
2592 }
2593
rsb__allocate_csc_arrays_from_coo_sorted(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,void ** VAp,rsb_coo_idx_t ** indxp,rsb_nnz_idx_t ** indptrp)2594 rsb_err_t rsb__allocate_csc_arrays_from_coo_sorted(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, void **VAp, rsb_coo_idx_t ** indxp, rsb_nnz_idx_t ** indptrp)
2595 {
2596 return rsb__allocate_csr_arrays_from_coo_sorted(VA, JA, IA, nnz, k, m, typecode, VAp, indxp, indptrp);
2597 }
2598
rsb__allocate_csr_arrays_from_coo_sorted(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,const rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,void ** VAp,rsb_coo_idx_t ** indxp,rsb_nnz_idx_t ** indptrp)2599 rsb_err_t rsb__allocate_csr_arrays_from_coo_sorted(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, const rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, void **VAp, rsb_coo_idx_t ** indxp, rsb_nnz_idx_t ** indptrp)
2600 {
2601 /*!
2602 \ingroup gr_internals
2603
2604 FIXME : UNFINISHED, UNTESTED, NEW, and SLOW
2605 */
2606 rsb_err_t errval = RSB_ERR_NO_ERROR;
2607 rsb_nnz_idx_t * indx = NULL,MI = 0;
2608 rsb_coo_idx_t * indpntr = NULL;
2609 void *cVA = NULL;
2610
2611 if(!indxp || !indptrp || !JA || !IA) /* VA is optional */
2612 {
2613 errval = RSB_ERR_BADARGS;
2614 goto err;
2615 }
2616
2617 indx = rsb__clone_area(JA,sizeof(rsb_coo_idx_t)*(nnz)); /* FIXME ! BAD ! */
2618 indpntr = rsb__calloc(sizeof(rsb_nnz_idx_t)*(m+1));
2619
2620 if(!indx || !indpntr)
2621 {
2622 errval = RSB_ERR_ENOMEM;
2623 goto err;
2624 }
2625
2626 if(VA)
2627 {
2628 cVA = rsb__clone_area(VA,RSB_SIZEOF(typecode)*(nnz));
2629 if(!cVA)
2630 {
2631 errval = RSB_ERR_ENOMEM;
2632 goto err;
2633 }
2634 }
2635
2636 errval = rsb__do_account_sorted_optimized_css(IA,JA,m,k,nnz,indpntr+1,NULL);
2637
2638 if(RSB_SOME_ERROR(errval))
2639 goto err;
2640
2641 for(MI=0;MI<m;++MI)
2642 indpntr[MI+1] += indpntr[MI];
2643
2644 if(VAp)*VAp = cVA;
2645 *indxp = indx;
2646 *indptrp = indpntr;
2647
2648 goto ok;
2649 err:
2650 RSB_CONDITIONAL_FREE(cVA);
2651 RSB_CONDITIONAL_FREE(indx);
2652 RSB_CONDITIONAL_FREE(indpntr);
2653 ok:
2654 RSB_DO_ERR_RETURN(errval)
2655 }
2656
rsb__print_configuration_string(const char * pn,rsb_char_t * cs,rsb_bool_t wci)2657 rsb_err_t rsb__print_configuration_string(const char *pn, rsb_char_t * cs, rsb_bool_t wci)
2658 {
2659 /*!
2660 \ingroup gr_internals
2661 */
2662 /*
2663 note : an interleaved
2664 RSB_INFO(
2665 #ifdef FOO
2666 "string"
2667 # endif
2668 )
2669 style of string composition breaks xlc, so we don't use it, in the following.
2670 FIXME: pn should be reasonably short, as this routine does NOT check for buffer overflow. for this same reason this function shall remain INTERNAL always.
2671 */
2672 rsb_char_t buf[RSB_MAX_VERSION_STRING_LENGTH];
2673 const rsb_char_t * sep = " ";
2674 const rsb_char_t * nl = "\n";
2675
2676 rsb__strcpy(buf,"");
2677 if(!cs)
2678 {
2679 // TODO: missing error handling
2680 goto done;
2681 }
2682 #if 0
2683 rsb__sprintf(buf,"%s version: %d.%d.%d\n",pn?pn:"librsb",RSB_LIBRSB_VER_MAJOR,RSB_LIBRSB_VER_MINOR,RSB_LIBRSB_VER_RELEASE);
2684 #else
2685 rsb__sprintf(buf,"%s version: %s\n",pn?pn:"librsb",RSB_LIBRSB_VER_STRING);
2686 #endif
2687 if(wci == RSB_BOOL_FALSE)
2688 {
2689 rsb__sprintf(cs,"%s",buf);
2690 rsb__sprintf(cs+strlen(cs),"%s.\n\n",RSB_COPYRIGHT_STRING);
2691 rsb__sprintf(cs+strlen(cs),"Written by %s.\n",RSB_PACKAGE_BUGREPORT);
2692 goto done;
2693 }
2694 rsb_util_strcat(buf,"format switches:");
2695 #ifdef RSB_MATRIX_STORAGE_BCSR_STRING
2696 rsb_util_strcat(buf,"br");
2697 rsb_util_strcat(buf,sep);
2698 #endif /* RSB_MATRIX_STORAGE_BCSR_STRING */
2699 #ifdef RSB_MATRIX_STORAGE_BCSC_STRING
2700 rsb_util_strcat(buf,"bc");
2701 rsb_util_strcat(buf,sep);
2702 #endif /* RSB_MATRIX_STORAGE_BCSC_STRING */
2703 #ifdef RSB_MATRIX_STORAGE_VBR_STRING
2704 rsb_util_strcat(buf,"vr");
2705 rsb_util_strcat(buf,sep);
2706 #endif /* RSB_MATRIX_STORAGE_VBR_STRING */
2707 #ifdef RSB_MATRIX_STORAGE_VBC_STRING
2708 rsb_util_strcat(buf,"vc");
2709 rsb_util_strcat(buf,sep);
2710 #endif /* RSB_MATRIX_STORAGE_VBC_STRING */
2711 #ifdef RSB_MATRIX_STORAGE_LC_STRING
2712 rsb_util_strcat(buf,"lc");
2713 rsb_util_strcat(buf,sep);
2714 #endif /* RSB_MATRIX_STORAGE_LC_STRING */
2715 #ifdef RSB_MATRIX_STORAGE_LR_STRING
2716 rsb_util_strcat(buf,"lr");
2717 rsb_util_strcat(buf,sep);
2718 #endif /* RSB_MATRIX_STORAGE_LR_STRING */
2719 rsb_util_strcat(buf,nl);
2720 rsb_util_strcat(buf,"ops:");
2721 rsb_util_strcat(buf,RSB_M4_MATRIX_META_OPS_STRING);
2722 rsb_util_strcat(buf,nl);
2723
2724 rsb_util_strcat(buf,"types:");
2725 rsb_util_strcat(buf,RSB_M4_MATRIX_TYPES_STRING);
2726 rsb_util_strcat(buf,nl);
2727 rsb_util_strcat(buf,"type char codes:");
2728 rsb_util_strcat(buf,RSB_NUMERICAL_TYPE_PREPROCESSOR_SYMBOLS );
2729 rsb_util_strcat(buf,nl);
2730 rsb_util_strcat(buf,"transposition codes:");
2731 rsb_util_strcat(buf,RSB_TRANSPOSITIONS_PREPROCESSOR_SYMBOLS );
2732 rsb_util_strcat(buf,nl);
2733
2734 rsb_util_strcat(buf,"restrict keyword is: ");
2735 #ifdef RSB_restrict
2736 rsb_util_strcat(buf,"on" );
2737 #else /* RSB_restrict */
2738 rsb_util_strcat(buf,"off" );
2739 #endif /* RSB_restrict */
2740 rsb_util_strcat(buf,nl);
2741
2742 rsb_util_strcat(buf,"row unrolls:");
2743 rsb_util_strcat(buf,RSB_M4_WANT_COLUMN_UNLOOP_FACTORS_STRING);
2744 rsb_util_strcat(buf,nl);
2745 rsb_util_strcat(buf,"column unrolls:");
2746 rsb_util_strcat(buf,RSB_M4_WANT_ROW_UNLOOP_FACTORS_STRING );
2747 rsb_util_strcat(buf,nl);
2748 rsb_util_strcat(buf,"reference benchmark sample minimum time (seconds):%lg\n");
2749 rsb_util_strcat(buf,"reference benchmark sample minimum runs:%zd\n");
2750 rsb_util_strcat(buf,"maximal configured block size:%zd\n");
2751 #ifdef RSB_WANT_OSKI_BENCHMARKING
2752 rsb_util_strcat(buf,"oski comparative benchmarking enabled\n");
2753 #endif /* RSB_WANT_OSKI_BENCHMARKING */
2754 rsb_util_strcat(buf,"sizeof(rsb_nnz_idx_t):%zd\n");
2755 rsb_util_strcat(buf,"sizeof(rsb_coo_idx_t):%zd\n");
2756 rsb_util_strcat(buf,"sizeof(rsb_blk_idx_t):%zd\n");
2757 rsb_util_strcat(buf,"sizeof(size_t):%zd\n");
2758 rsb_util_strcat(buf,"sizeof(struct rsb_mtx_t):%zd\n");
2759 rsb_util_strcat(buf,"sizeof(struct rsb_blas_sparse_matrix_t):%zd\n");
2760 rsb_util_strcat(buf,"sizeof(struct rsb_coo_matrix_t):%zd\n");
2761 rsb_util_strcat(buf,"RSB_MAX_MATRIX_DIM:%zd\n");
2762 rsb_util_strcat(buf,"RSB_MAX_MATRIX_NNZ:%zd\n");
2763 rsb_util_strcat(buf,"RSB_CONST_MAX_SUPPORTED_CORES:%zd\n");
2764 rsb_util_strcat(buf,"RSB_BLAS_MATRICES_MAX:%zd\n");
2765 rsb_util_strcat(buf,"RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH:%zd\n");
2766
2767 rsb_util_strcat(buf,"RSB_USER_SET_MEM_HIERARCHY_INFO:%s\n");
2768 rsb_util_strcat(buf,"RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t):%zd\n");
2769 rsb_util_strcat(buf,"RSB_IOLEVEL:%d\n");
2770 //RSB_INFO(
2771 rsb__sprintf(cs,
2772 buf,
2773 RSB_BENCHMARK_MIN_SECONDS,
2774 (rsb_printf_int_t)RSB_BENCHMARK_MIN_RUNS,
2775 (rsb_printf_int_t)RSB_MAXIMAL_CONFIGURED_BLOCK_SIZE,
2776 sizeof(rsb_nnz_idx_t),
2777 sizeof(rsb_coo_idx_t),
2778 sizeof(rsb_blk_idx_t),
2779 sizeof(size_t),
2780 sizeof(struct rsb_mtx_t),
2781 sizeof(struct rsb_blas_sparse_matrix_t),
2782 sizeof(struct rsb_coo_matrix_t),
2783 (rsb_printf_int_t)RSB_MAX_MATRIX_DIM,
2784 (rsb_printf_int_t)RSB_MAX_MATRIX_NNZ,
2785 (rsb_printf_int_t)RSB_CONST_MAX_SUPPORTED_CORES,
2786 (rsb_printf_int_t)RSB_BLAS_MATRICES_MAX,
2787 (rsb_printf_int_t)RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH
2788 ,(rsb_printf_int_t)rsb__init_get_mem_hierarchy_info_string(RSB_BOOL_FALSE)?rsb__init_get_mem_hierarchy_info_string(RSB_BOOL_FALSE):NULL
2789 ,RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t)
2790 ,RSB_IOLEVEL
2791 );
2792 done:
2793 return RSB_ERR_NO_ERROR;
2794 }
2795
rsb__recursive_middle_block_index(rsb_blk_idx_t i)2796 rsb_blk_idx_t rsb__recursive_middle_block_index(rsb_blk_idx_t i)
2797 {
2798 /*!
2799 * \ingroup gr_internals
2800 *
2801 * \return the index of the split point index.
2802 */
2803 #if RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION
2804 rsb_blk_idx_t s = 0;
2805 while( (1<<s+1) < i)
2806 ++s;
2807 return (1<<s);
2808 #else /* RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION */
2809 #if 1
2810 return (i+1)/2;
2811 #else
2812 int p = 0;
2813 while(i>>(p+1) && i > (1<<(p+1)))
2814 ++p;
2815 // RSB_INFO("%d %d %d\n",i,p,(1<<p));
2816 return (1<<p);
2817 #endif
2818 #endif /* RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION */
2819 }
2820
rsb__recursive_middle_index(const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t * M_bp,rsb_coo_idx_t * K_bp)2821 rsb_err_t rsb__recursive_middle_index(const struct rsb_mtx_partitioning_info_t * pinfop, rsb_coo_idx_t * M_bp, rsb_coo_idx_t * K_bp )
2822 {
2823 /*!
2824 * \ingroup gr_internals
2825 *
2826 * \return the index of ..
2827 */
2828 rsb_blk_idx_t mBi;
2829 rsb_blk_idx_t kBi;
2830 if(!pinfop || !M_bp || !K_bp)
2831 return RSB_ERR_BADARGS;
2832 mBi = rsb__recursive_middle_block_index(pinfop->M_b);
2833 kBi = rsb__recursive_middle_block_index(pinfop->K_b);
2834 if(pinfop->rpntr)
2835 *M_bp = pinfop->rpntr[rsb__recursive_middle_block_index(mBi)];
2836 else
2837 *M_bp = rsb__recursive_middle_block_index(pinfop->nr);
2838 if(pinfop->cpntr)
2839 *K_bp = pinfop->cpntr[rsb__recursive_middle_block_index(kBi)];
2840 else
2841 *K_bp = rsb__recursive_middle_block_index(pinfop->nc);
2842 return RSB_ERR_NO_ERROR;
2843 }
2844
rsb__recursive_split_point_parms_get(const struct rsb_mtx_partitioning_info_t * pinfop,rsb_coo_idx_t * moff,rsb_coo_idx_t * koff)2845 rsb_err_t rsb__recursive_split_point_parms_get(
2846 const struct rsb_mtx_partitioning_info_t * pinfop,
2847 rsb_coo_idx_t * moff, rsb_coo_idx_t * koff)
2848 {
2849 /*!
2850 \ingroup gr_internals
2851
2852 FIXME: this is function is OBSOLETE
2853 */
2854 rsb_err_t errval = RSB_ERR_NO_ERROR;
2855
2856 if(!pinfop || !pinfop->rpntr || !pinfop->cpntr)
2857 {
2858 errval = RSB_ERR_BADARGS;
2859 }
2860 RSB_DEBUG_ASSERT(moff);
2861 RSB_DEBUG_ASSERT(koff);
2862 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(pinfop->M_b));
2863 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(pinfop->K_b));
2864
2865 *moff = pinfop->rpntr[ rsb__recursive_middle_block_index(pinfop->M_b) ];
2866 *koff = pinfop->cpntr[ rsb__recursive_middle_block_index(pinfop->K_b) ];
2867
2868 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(*moff));
2869 RSB_DEBUG_ASSERT(RSB_IS_VALID_COO_INDEX(*koff));
2870
2871 RSB_DO_ERR_RETURN(errval)
2872 }
2873
rsb__terminal_recursive_matrix_count(const struct rsb_mtx_t * mtxAp)2874 rsb_long_t rsb__terminal_recursive_matrix_count(const struct rsb_mtx_t *mtxAp)
2875 {
2876 /*!
2877 * \ingroup gr_internals
2878 * \return the count of leaf (terminal) matrices
2879 *
2880 * TODO : change this function type!
2881 */
2882 rsb_submatrix_idx_t i,j;
2883 struct rsb_mtx_t * submatrix = NULL;
2884 rsb_long_t smc = 0;
2885
2886 if(!mtxAp)
2887 {smc = 0;goto done;}
2888
2889 if(rsb__is_terminal_recursive_matrix(mtxAp))
2890 {smc = 1;goto done;}
2891
2892 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
2893 if(submatrix)
2894 smc += rsb__terminal_recursive_matrix_count(submatrix);
2895 done:
2896 return smc;
2897 }
2898
rsb__do_compute_terminal_nnz_min_max_avg_count(const struct rsb_mtx_t * mtxAp,rsb_nnz_idx_t * minnz,rsb_nnz_idx_t * maxnz,rsb_nnz_idx_t * avgnz)2899 rsb_err_t rsb__do_compute_terminal_nnz_min_max_avg_count(const struct rsb_mtx_t *mtxAp, rsb_nnz_idx_t * minnz, rsb_nnz_idx_t * maxnz, rsb_nnz_idx_t * avgnz)
2900 {
2901 // struct rsb_mtx_t * submatrix = NULL;
2902 rsb_err_t errval = RSB_ERR_NO_ERROR;
2903 if(minnz)
2904 *minnz = RSB_MAX_MATRIX_NNZ;
2905 if(maxnz)
2906 *maxnz = 0;
2907 if(avgnz)
2908 *avgnz = 0;
2909 errval = rsb__do_compute_terminal_nnz_min_max_count(mtxAp,minnz,maxnz);
2910 if(avgnz)
2911 *avgnz = mtxAp->nnz/rsb__terminal_recursive_matrix_count(mtxAp);
2912 RSB_DO_ERR_RETURN(errval)
2913 }
2914
rsb__do_compute_terminal_nnz_min_max_count(const struct rsb_mtx_t * mtxAp,rsb_nnz_idx_t * minnz,rsb_nnz_idx_t * maxnz)2915 rsb_err_t rsb__do_compute_terminal_nnz_min_max_count(const struct rsb_mtx_t *mtxAp, rsb_nnz_idx_t * minnz, rsb_nnz_idx_t * maxnz)
2916 {
2917 /*!
2918 * \ingroup gr_internals
2919 * \return ...
2920 */
2921 rsb_submatrix_idx_t i,j;
2922 struct rsb_mtx_t * submatrix = NULL;
2923 rsb_err_t errval = RSB_ERR_NO_ERROR;
2924
2925 if(!mtxAp)
2926 {
2927 RSB_ERROR(RSB_ERRM_ES);
2928 errval = RSB_ERR_BADARGS;
2929 }
2930
2931 if(rsb__is_terminal_recursive_matrix(mtxAp))
2932 {
2933 if(minnz)
2934 // RSB_STDOUT_MATRIX_SUMMARY(mtxAp), RSB_INFO(" <- MIN (from %d)\n",*minnz),
2935 *minnz = RSB_MIN(*minnz,mtxAp->nnz);
2936 if(maxnz)
2937 // RSB_STDOUT_MATRIX_SUMMARY(mtxAp), RSB_INFO(" <- MAX (from %d)\n",*maxnz),
2938 *maxnz = RSB_MAX(*maxnz,mtxAp->nnz);
2939 }
2940 else
2941 {
2942 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
2943 if(submatrix)
2944 RSB_DO_ERROR_CUMULATE(errval,rsb__do_compute_terminal_nnz_min_max_count(submatrix,minnz,maxnz));
2945 }
2946 //done:
2947 RSB_DO_ERR_RETURN(errval)
2948 }
2949
rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(const struct rsb_mtx_t * mtxAp,rsb_fmt_t matrix_storage,rsb_flags_t flags)2950 rsb_long_t rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(const struct rsb_mtx_t *mtxAp, rsb_fmt_t matrix_storage, rsb_flags_t flags)
2951 {
2952 /*!
2953 * \ingroup gr_internals
2954 * \return the count of leaf (terminal) matrices
2955 *
2956 * TODO : change this function type!
2957 */
2958 rsb_submatrix_idx_t i,j;
2959 struct rsb_mtx_t * submatrix = NULL;
2960 rsb_long_t smc = 0;
2961
2962 if(!mtxAp)
2963 {smc=0;goto done;}
2964
2965 if(rsb__is_terminal_recursive_matrix(mtxAp))
2966 {
2967 if((!RSB_DO_FLAG_HAS(mtxAp->flags,flags)) && (mtxAp->matrix_storage==matrix_storage))
2968 smc = 1;
2969 goto done;
2970 }
2971
2972 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
2973 if(submatrix)
2974 smc += rsb__terminal_recursive_matrix_count_with_storage_and_no_flags(submatrix,matrix_storage,flags);
2975 done:
2976 return smc;
2977 }
2978
rsb__terminal_recursive_matrix_count_with_storage_and_flags(const struct rsb_mtx_t * mtxAp,rsb_fmt_t matrix_storage,rsb_flags_t flags)2979 rsb_long_t rsb__terminal_recursive_matrix_count_with_storage_and_flags(const struct rsb_mtx_t *mtxAp, rsb_fmt_t matrix_storage, rsb_flags_t flags)
2980 {
2981 /*!
2982 * \ingroup gr_internals
2983 * \return the count of leaf (terminal) matrices
2984 *
2985 * TODO : change this function type!
2986 */
2987 rsb_submatrix_idx_t i,j;
2988 struct rsb_mtx_t * submatrix = NULL;
2989 rsb_long_t smc = 0;
2990
2991 if(!mtxAp)
2992 {smc = 0;goto done;}
2993
2994 if(rsb__is_terminal_recursive_matrix(mtxAp))
2995 {
2996 if(RSB_DO_FLAG_HAS(mtxAp->flags,flags) && (mtxAp->matrix_storage==matrix_storage))
2997 smc = 1;
2998 goto done;
2999 }
3000
3001 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3002 if(submatrix)
3003 smc += rsb__terminal_recursive_matrix_count_with_storage_and_flags(submatrix,matrix_storage,flags);
3004 done:
3005 return smc;
3006 }
3007
rsb__terminal_recursive_matrix_count_with_flags_but(const struct rsb_mtx_t * mtxAp,rsb_flags_t flags,rsb_flags_t nflags)3008 rsb_long_t rsb__terminal_recursive_matrix_count_with_flags_but(const struct rsb_mtx_t *mtxAp, rsb_flags_t flags, rsb_flags_t nflags)
3009 {
3010 /*!
3011 * \ingroup gr_internals
3012 * \return the count of leaf (terminal) matrices
3013 *
3014 * TODO : change this function type!
3015 */
3016 rsb_submatrix_idx_t i,j;
3017 struct rsb_mtx_t * submatrix = NULL;
3018 rsb_long_t smc = 0;
3019
3020 if(!mtxAp)
3021 {smc = 0;goto done;}
3022
3023 if(rsb__is_terminal_recursive_matrix(mtxAp))
3024 {
3025 if(RSB_DO_FLAG_HAS(mtxAp->flags,flags) && !RSB_DO_FLAG_HAS(mtxAp->flags,nflags))
3026 smc = 1;
3027 goto done;
3028 }
3029
3030 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3031 if(submatrix)
3032 smc += rsb__terminal_recursive_matrix_count_with_flags_but(submatrix,flags,nflags);
3033 done:
3034 return smc;
3035 }
3036
rsb__terminal_recursive_matrix_count_with_flags(const struct rsb_mtx_t * mtxAp,rsb_flags_t flags)3037 rsb_long_t rsb__terminal_recursive_matrix_count_with_flags(const struct rsb_mtx_t *mtxAp, rsb_flags_t flags)
3038 {
3039 /*!
3040 * \ingroup gr_internals
3041 * \return the count of leaf (terminal) matrices
3042 *
3043 * TODO : change this function type!
3044 */
3045 rsb_submatrix_idx_t i,j;
3046 struct rsb_mtx_t * submatrix = NULL;
3047 rsb_long_t smc = 0;
3048
3049 if(!mtxAp)
3050 {smc = 0;goto done;}
3051
3052 if(rsb__is_terminal_recursive_matrix(mtxAp))
3053 {
3054 if(RSB_DO_FLAG_HAS(mtxAp->flags,flags))
3055 smc = 1;
3056 goto done;
3057 }
3058
3059 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3060 if(submatrix)
3061 smc += rsb__terminal_recursive_matrix_count_with_flags(submatrix,flags);
3062 done:
3063 return smc;
3064 }
3065
rsb__do_transposition_from_char(rsb_char_t tc)3066 rsb_trans_t rsb__do_transposition_from_char(rsb_char_t tc)
3067 {
3068 /*!
3069 * \ingroup gr_internals
3070 * FIXME : document this
3071 */
3072 if(tolower(tc)=='t')
3073 return RSB_TRANSPOSITION_T;
3074 else
3075 if(tolower(tc)=='n')
3076 return RSB_TRANSPOSITION_N;
3077 else
3078 if(tolower(tc)=='c')
3079 return RSB_TRANSPOSITION_C;
3080 else
3081 return RSB_INVALID_TRANS;
3082 }
3083
rsb__do_transpose_transposition(rsb_trans_t transA)3084 rsb_trans_t rsb__do_transpose_transposition(rsb_trans_t transA)
3085 {
3086 /*!
3087 * \ingroup gr_internals
3088 * FIXME : document this
3089 */
3090 if(RSB_DOES_NOT_TRANSPOSE(transA))
3091 return RSB_TRANSPOSITION_T;
3092 if(transA == RSB_TRANSPOSITION_T)
3093 return RSB_TRANSPOSITION_N;
3094 if(transA == RSB_TRANSPOSITION_C)
3095 return RSB_TRANSPOSITION_N;
3096 return transA;
3097 }
3098
rsb_spmm_inner(const struct rsb_mtx_t * mtxAp,const void * mrhs,void * mout,rsb_int_t bstride,rsb_int_t cstride,rsb_int_t nrhs,rsb_trans_t transA)3099 rsb_err_t rsb_spmm_inner(const struct rsb_mtx_t * mtxAp, const void * mrhs, void *mout, rsb_int_t bstride, rsb_int_t cstride, rsb_int_t nrhs, rsb_trans_t transA)
3100 {
3101 #ifdef RSB_HAVE_OPTYPE_SPMM_AZ
3102 /*!
3103 * \ingroup gr_internals
3104 * fixme */
3105
3106 size_t el_size = mtxAp->el_size;
3107
3108 if( rsb__is_recursive_matrix(mtxAp->flags))
3109 {
3110 rsb_submatrix_idx_t i,j;
3111 struct rsb_mtx_t * submatrix = NULL;
3112 rsb_coo_idx_t mB = (mtxAp->rpntr[rsb__recursive_middle_block_index(mtxAp->M_b)]);
3113 rsb_coo_idx_t kB = (mtxAp->cpntr[rsb__recursive_middle_block_index(mtxAp->K_b)]);
3114
3115 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
3116 if(submatrix)
3117 {
3118 rsb_coo_idx_t moff,koff;
3119
3120 moff = i*mB;
3121 koff = j*kB;
3122
3123 rsb_spmm_inner(submatrix,((rsb_byte_t*)mrhs)+koff*el_size,((rsb_byte_t*)mout)+moff*el_size,bstride,cstride,nrhs,transA);
3124 }
3125 return RSB_ERR_NO_ERROR;
3126 }
3127 else
3128 return rsb__do_spmm_az(mtxAp,mrhs,mout,bstride,cstride,nrhs,transA);/*FIXME*/
3129 #else /* RSB_HAVE_OPTYPE_SPMM_AZ */
3130 return RSB_ERR_UNSUPPORTED_OPERATION;
3131 #endif /* RSB_HAVE_OPTYPE_SPMM_AZ */
3132 }
3133
rsb__do_flip_uplo_flags(rsb_flags_t flags)3134 rsb_flags_t rsb__do_flip_uplo_flags(rsb_flags_t flags)
3135 {
3136 /**
3137 * \ingroup gr_internals
3138 */
3139 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_UPPER))
3140 RSB_DO_FLAG_SUBST(flags,RSB_FLAG_UPPER,RSB_FLAG_LOWER);
3141 else
3142 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_LOWER))
3143 RSB_DO_FLAG_SUBST(flags,RSB_FLAG_LOWER,RSB_FLAG_UPPER);
3144 return flags;
3145 }
3146
rsb__do_detect_and_add_triangular_flags(rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_flags_t flags)3147 rsb_flags_t rsb__do_detect_and_add_triangular_flags(rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_flags_t flags)
3148 {
3149 /**
3150 * \ingroup gr_internals
3151 */
3152 rsb_nnz_idx_t i;
3153 if(!IA || !JA || RSB_INVALID_NNZ_INDEX(nnz))
3154 return flags;
3155 /* FIXME: this code could be optimized a great deal, by introducing a state machine like scan. */
3156 for(i=0;i<nnz;++i)
3157 {
3158 if(IA[i]==JA[i])
3159 continue;
3160 if(IA[i]>JA[i])
3161 RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
3162 else
3163 RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
3164 }
3165 if(RSB_NAND(RSB_DO_FLAG_HAS(flags,RSB_FLAG_UPPER),RSB_DO_FLAG_HAS(flags,RSB_FLAG_LOWER)))
3166 RSB_DO_FLAG_ADD(flags,RSB_FLAG_TRIANGULAR);
3167
3168 /* it could be the case that both RSB_FLAG_LOWER and RSB_FLAG_UPPER flags get caught */
3169 return flags;
3170 }
3171
rsb__do_load_matrix_file_as_matrix_market(struct rsb_mtx_t ** mtxApp,const rsb_char_t * filename,rsb_flags_t flags,rsb_type_t typecode)3172 rsb_err_t rsb__do_load_matrix_file_as_matrix_market(struct rsb_mtx_t ** mtxApp, const rsb_char_t * filename, rsb_flags_t flags, rsb_type_t typecode)
3173 {
3174 /*!
3175 * FIXME: and typecode check ?
3176 * FIXME: UNFINISHED
3177 */
3178 /** \ingroup gr_unfinished */
3179 rsb_err_t errval = RSB_ERR_NO_ERROR;
3180 struct rsb_mtx_t * mtxAp = NULL;
3181 void *VA = NULL;
3182 rsb_coo_idx_t *IA = NULL, *JA = NULL;
3183
3184 if(!mtxApp)
3185 {
3186 RSB_ERROR(RSB_ERRM_E_MTXAP);
3187 errval = RSB_ERR_BADARGS;
3188 }
3189 else
3190 if(!filename)
3191 {
3192 RSB_ERROR(RSB_ERRM_NPSFF);
3193 errval = RSB_ERR_BADARGS;
3194 }
3195 else
3196 {
3197 rsb_coo_idx_t m = 0,k = 0;
3198 rsb_nnz_idx_t nnz = 0;
3199 #define RSB_20120309_FIX 1
3200 #if RSB_20120309_FIX
3201 rsb_bool_t is_symmetric = RSB_BOOL_FALSE;
3202 rsb_bool_t is_hermitian = RSB_BOOL_FALSE;
3203 rsb_bool_t is_lower = RSB_BOOL_FALSE;
3204 rsb_bool_t is_upper = RSB_BOOL_FALSE;
3205 rsb_bool_t is_vector = RSB_BOOL_FALSE;
3206 /* FIXME: shall update test_matops.c accordingly. */
3207 /* FIXME: need limits checks! */
3208 if(RSB_SOME_ERROR(rsb__util_mm_info_matrix_f(filename,&m,&k,&nnz,NULL,&is_symmetric,&is_hermitian,NULL,&is_lower,&is_upper,&is_vector) ) || is_vector )
3209 {
3210 errval = RSB_ERR_BADARGS;
3211 RSB_PERR_GOTO(err,RSB_ERRMSG_NOTMTXMKT" : %s ..\n",filename);
3212 }
3213 else
3214 {
3215 if( is_symmetric == RSB_BOOL_TRUE ) RSB_DO_FLAG_ADD(flags,RSB_FLAG_SYMMETRIC);
3216 if( is_hermitian == RSB_BOOL_TRUE ) RSB_DO_FLAG_ADD(flags,RSB_FLAG_HERMITIAN);
3217 if(is_upper) RSB_DO_FLAG_ADD(flags,RSB_FLAG_UPPER);
3218 if(is_lower) RSB_DO_FLAG_ADD(flags,RSB_FLAG_LOWER);
3219 }
3220
3221 if( m==k && m>1 /* && want_only_lowtri*/ )
3222 nnz += m; /* the loading routine shall allocate nnz+m */
3223 else
3224 nnz = 0; /* the loading routine should determine nnz */
3225 #endif /* RSB_20120309_FIX */
3226 if((errval = rsb__util_mm_load_matrix_f(filename,&IA,&JA,&VA,&m,&k,&nnz,typecode,flags,NULL,NULL))!=RSB_ERR_NO_ERROR)
3227 {
3228 RSB_ERROR(RSB_ERRM_ES);
3229 rsb__do_perror(NULL,errval);
3230 goto err;
3231 }
3232 if((mtxAp = rsb__do_mtx_alloc_from_coo_inplace(VA,IA,JA,nnz,typecode,m,k,RSB_DEFAULT_ROW_BLOCKING,RSB_DEFAULT_COL_BLOCKING,flags,&errval))==NULL)
3233 {
3234 RSB_PERR_GOTO(err,RSB_ERRM_ES);
3235 // FIXME: incomplete error handling
3236 }
3237 if(mtxAp)
3238 {
3239 rsb_submatrix_idx_t smi;
3240 struct rsb_mtx_t * submatrix;
3241
3242 RSB_SUBMATRIX_FOREACH_LEAF(mtxAp,submatrix,smi)
3243 RSB_DO_FLAG_DEL(submatrix->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3244 RSB_DO_FLAG_DEL(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3245 }
3246 *mtxApp = mtxAp;
3247 goto ok;
3248 }
3249 err:
3250 RSB_CONDITIONAL_FREE(IA);
3251 RSB_CONDITIONAL_FREE(JA);
3252 RSB_CONDITIONAL_FREE(VA);
3253 ok:
3254 RSB_DO_ERR_RETURN(errval)
3255 }
3256
rsb__are_coo_matrices_equal(const struct rsb_coo_matrix_t * cm1,const struct rsb_coo_matrix_t * cm2)3257 rsb_bool_t rsb__are_coo_matrices_equal(const struct rsb_coo_matrix_t *cm1, const struct rsb_coo_matrix_t *cm2)
3258 {
3259 /* this is a debug routine and may print internal stuff */
3260 const rsb_bool_t no = RSB_BOOL_FALSE;
3261 const rsb_bool_t yes = RSB_BOOL_TRUE;
3262 rsb_nnz_idx_t nnz;
3263 #if (RSB_WANT_VERBOSE_MESSAGES || 1)
3264 #define RSB_GOTO_DIFFERING { RSB_PERR_GOTO(differing,RSB_ERRM_ES);}
3265 #else /* RSB_WANT_VERBOSE_MESSAGES */
3266 #define RSB_GOTO_DIFFERING {goto differing;}
3267 #endif /* RSB_WANT_VERBOSE_MESSAGES */
3268 if( cm1 == cm2)
3269 goto equal;
3270 if(!cm1 || !cm2)
3271 RSB_GOTO_DIFFERING
3272 nnz = cm1->nnz;
3273 if( RSB_INVALID_NNZ_INDEX(nnz) )
3274 RSB_GOTO_DIFFERING
3275 if(cm1->nr!= cm2->nr)
3276 RSB_GOTO_DIFFERING
3277 if(cm1->nc!= cm2->nc)
3278 RSB_GOTO_DIFFERING
3279 if(cm1->nnz!= cm2->nnz)
3280 RSB_GOTO_DIFFERING
3281 if(cm1->typecode!= cm2->typecode)
3282 RSB_GOTO_DIFFERING
3283 //if((!cm1->IA)&&(!cm2->IA)) return no;
3284 //if((!cm1->IA)||(!cm2->IA)) return no;
3285 //else
3286 if((cm1->IA)&&(cm2->IA))
3287 if(RSB_MEMCMP(cm1->IA,cm2->IA,sizeof(rsb_coo_idx_t)*nnz))
3288 RSB_GOTO_DIFFERING
3289 //if((!cm1->JA)&&(!cm2->JA)) return no;
3290 //if((!cm1->JA)||(!cm2->JA)) return no;
3291 //else
3292 if((cm1->JA)&&(cm2->JA))
3293 if(RSB_MEMCMP(cm1->JA,cm2->JA,sizeof(rsb_coo_idx_t)*nnz))
3294 RSB_GOTO_DIFFERING
3295 //if((!cm1->VA)&&(!cm2->VA)) return no;
3296 //if((!cm1->VA)||(!cm2->VA)) return no;
3297 //else
3298 if((cm1->VA)&&(cm2->VA))
3299 {
3300 #if 1
3301 if(rsb__do_are_same(cm1->VA,cm2->VA, nnz,cm1->typecode, 1, 1))
3302 RSB_GOTO_DIFFERING
3303 #else
3304 if(RSB_MEMCMP(cm1->VA,cm2->VA,RSB_SIZEOF(cm1->typecode)*nnz)) /* This is too strict: for it, -0.0 != 0.0 */
3305 RSB_GOTO_DIFFERING
3306 #endif
3307 }
3308
3309
3310 equal:
3311 return yes;
3312 differing:
3313 #if RSB_ALLOW_STDOUT
3314 #if (RSB_WANT_VERBOSE_MESSAGES || 1)
3315 if(cm1)RSB_STDOUT_COO_MATRIX_SUMMARY(cm1);
3316 if(cm2)RSB_STDOUT_COO_MATRIX_SUMMARY(cm2);
3317 #endif /* RSB_WANT_VERBOSE_MESSAGES */
3318 #endif /* RSB_ALLOW_STDOUT */
3319 return no;
3320 #undef RSB_GOTO_DIFFERING
3321 }
3322
rsb__is_coo_matrix_empty(const struct rsb_coo_matrix_t * cm,rsb_flags_t flags)3323 static rsb_bool_t rsb__is_coo_matrix_empty(const struct rsb_coo_matrix_t *cm, rsb_flags_t flags)
3324 {
3325 if(!cm)
3326 return RSB_BOOL_FALSE;
3327 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_UNIT_DIAG_IMPLICIT))
3328 return RSB_BOOL_FALSE;
3329 if(cm->nnz==0)
3330 return RSB_BOOL_TRUE;
3331 return (rsb_check_for_nonzeros(cm->VA,cm->nnz,cm->typecode)==0)?RSB_BOOL_TRUE:RSB_BOOL_FALSE;
3332 }
3333
rsb__are_coo_matrices_both_empty(const struct rsb_coo_matrix_t * cm1,rsb_flags_t flags1,const struct rsb_coo_matrix_t * cm2,rsb_flags_t flags2)3334 rsb_bool_t rsb__are_coo_matrices_both_empty(const struct rsb_coo_matrix_t *cm1, rsb_flags_t flags1, const struct rsb_coo_matrix_t *cm2, rsb_flags_t flags2)
3335 {
3336 rsb_bool_t im1e = RSB_BOOL_FALSE,im2e = RSB_BOOL_FALSE,abme = RSB_BOOL_FALSE;
3337 im1e = rsb__is_coo_matrix_empty(cm1,flags1);
3338 im2e = rsb__is_coo_matrix_empty(cm2,flags2);
3339 abme = RSB_BOOL_OR(im1e,im2e);
3340 return abme;
3341 }
3342
rsb__are_coo_matrices_equal_or_both_empty(const struct rsb_coo_matrix_t * cm1,rsb_flags_t flags1,const struct rsb_coo_matrix_t * cm2,rsb_flags_t flags2)3343 rsb_bool_t rsb__are_coo_matrices_equal_or_both_empty(const struct rsb_coo_matrix_t *cm1, rsb_flags_t flags1, const struct rsb_coo_matrix_t *cm2, rsb_flags_t flags2)
3344 {
3345 rsb_bool_t acme = rsb__are_coo_matrices_equal(cm1,cm2);
3346 if(acme)
3347 ;
3348 else
3349 acme = rsb__are_coo_matrices_both_empty(cm1,flags1,cm2,flags2);
3350 return acme;
3351 }
3352
rsb__get_row_dense(const struct rsb_mtx_t * mtxAp,void * row,rsb_coo_idx_t i)3353 rsb_err_t rsb__get_row_dense(const struct rsb_mtx_t * mtxAp, void* row, rsb_coo_idx_t i )
3354 {
3355 /*!
3356 * \ingroup gr_mops
3357 * Will write entire row i of matrix matrix in the row vector.
3358 *
3359 * \param i the specified row
3360 * \param row an already allocated vector of the same type as the matrix
3361 * \param mtxAp is a valid pointer to a rsb_mtx_t structure
3362 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3363 *
3364 * FIXME: this function is unfinished.
3365 * */
3366 if(!mtxAp)
3367 return RSB_ERR_BADARGS;
3368
3369 RSB_BZERO( row , mtxAp->el_size * mtxAp->nc);
3370
3371 return rsb__do_get_row_dense(mtxAp, row, i );
3372 }
3373
rsb_spmv_unua(const struct rsb_mtx_t * mtxAp,const void * x,void * y,rsb_trans_t transA)3374 rsb_err_t rsb_spmv_unua(const struct rsb_mtx_t * mtxAp, const void * x, void * y, rsb_trans_t transA)
3375 {
3376 /*!
3377 * \ingroup gr_mops
3378 * computes \f$y \leftarrow y - op(A) \cdot x \f$
3379 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3380 *
3381 * */
3382 rsb_aligned_t mone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
3383 rsb__util_set_area_to_converted_integer(&mone[0],mtxAp->typecode,-1);
3384 return rsb__do_spmv_general(transA,&mone[0],mtxAp,x,1,NULL,y,1,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3385 }
3386
rsb_spmv_uaua(const struct rsb_mtx_t * mtxAp,const void * rhs,void * out,rsb_trans_t transA)3387 rsb_err_t rsb_spmv_uaua(const struct rsb_mtx_t * mtxAp, const void * rhs, void * out, rsb_trans_t transA)
3388 {
3389 /*!
3390 * \ingroup gr_mops
3391 * computes \f$y \leftarrow y + op(A) \cdot x \f$
3392 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3393 *
3394 * */
3395 rsb_err_t errval = RSB_ERR_NO_ERROR;
3396 errval = rsb__do_spmv_general(transA,NULL,mtxAp,rhs,1,NULL,out,1,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3397 RSB_DO_ERR_RETURN(errval)
3398 }
3399
rsb_spmv_uauz(const struct rsb_mtx_t * mtxAp,const void * rhs,void * out,rsb_trans_t transA)3400 rsb_err_t rsb_spmv_uauz(const struct rsb_mtx_t * mtxAp, const void * rhs, void * out, rsb_trans_t transA)
3401 {
3402 /*!
3403 * \ingroup gr_mops
3404 * computes \f$y \leftarrow op(A) \cdot x \f$
3405 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3406 * */
3407
3408 /* FIXME : TEMPORARY */
3409 rsb_aligned_t zero[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
3410
3411 if(!mtxAp)
3412 return RSB_ERR_BADARGS;
3413 rsb__util_set_area_to_converted_integer(&zero[0],mtxAp->typecode,0);
3414 return rsb__do_spmv_general(transA,NULL,mtxAp,rhs,1,&zero[0],out,1,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3415 }
3416
3417 #if 0
3418 static rsb_err_t rsb_spmv_sxsx(const struct rsb_mtx_t * mtxAp, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_coo_idx_t incx, rsb_coo_idx_t incy)
3419 {
3420 /*!
3421 * \ingroup gr_mops
3422 * computes \f$y \leftarrow \beta \cdot y + \alpha\cdot A\cdot x\f$
3423 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3424 */
3425 if(!alphap || !betap)
3426 return RSB_ERR_BADARGS;
3427 return rsb__do_spmv_general(transA,alphap,mtxAp,x,incx,betap,y,incy,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
3428 }
3429 #endif
3430
3431
3432
rsb__load_matrix_file_as_binary(const rsb_char_t * filename,rsb_err_t * errvalp)3433 struct rsb_mtx_t * rsb__load_matrix_file_as_binary(const rsb_char_t * filename, rsb_err_t *errvalp)
3434 {
3435 /*!
3436 */
3437 rsb_err_t errval = RSB_ERR_NO_ERROR;
3438
3439 struct rsb_mtx_t *mtxAp = NULL;
3440 if(!errvalp || !filename)
3441 {
3442 errval = RSB_ERR_BADARGS;
3443 }
3444 else
3445 errval = rsb__do_load_matrix_file_as_binary(&mtxAp,filename);
3446 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3447 return mtxAp;
3448 }
3449
rsb__do_spsm(rsb_trans_t transT,const void * alphap,const struct rsb_mtx_t * mtxAp,rsb_coo_idx_t nrhs,rsb_flags_t order,const void * betap,const void * b,rsb_nnz_idx_t ldb,void * c,rsb_nnz_idx_t ldc)3450 rsb_err_t rsb__do_spsm(rsb_trans_t transT, const void * alphap, const struct rsb_mtx_t * mtxAp, rsb_coo_idx_t nrhs, rsb_flags_t order, const void * betap, const void * b, rsb_nnz_idx_t ldb, void * c, rsb_nnz_idx_t ldc)
3451 {
3452 /*!
3453 * \ingroup gr_internals
3454 * When x is a multivector with nrhs elements, b elements having stride ldb and c elements having stride ldc
3455 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
3456 * */
3457 /* FIXME : and error detection ? **/
3458 /* FIXME : UNTESTED, UNFINISHED **/
3459 rsb_coo_idx_t l = 0;
3460 rsb_err_t errval = RSB_ERR_NO_ERROR;
3461
3462 #if RSB_ALLOW_ZERO_DIM
3463 if(RSB_ANY_MTX_DIM_ZERO(mtxAp))
3464 {
3465 goto err; /* FIXME: skipping further checks */
3466 }
3467 #endif
3468
3469 if(!mtxAp || !b || !ldb || !c || !ldc || !nrhs /* transA*/ || !alphap || !betap ||
3470 ( order != RSB_FLAG_WANT_COLUMN_MAJOR_ORDER && order != RSB_FLAG_WANT_ROW_MAJOR_ORDER ) )
3471 {
3472 errval = RSB_ERR_BADARGS;
3473 goto err;
3474 }
3475
3476
3477 if( 0
3478 #if RSB_ENABLE_INNER_NRHS_SPSV
3479 || ( rsb_global_session_handle.want_outer_spmm==0 )
3480 #endif
3481 ) /* 0 == yes TODO: need want_outer_spsm here */
3482 {
3483 size_t outnri = ldc, rhsnri = ldb;
3484
3485 if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3486 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv_general(transT,alphap,mtxAp,b, 1,c, 1,RSB_OP_FLAG_DEFAULT RSB_INNER_NRHS_SPSV_ARGS_IDS));
3487 else
3488 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv_general(transT,alphap,mtxAp,b,nrhs,c,nrhs,RSB_OP_FLAG_DEFAULT RSB_INNER_NRHS_SPSV_ARGS_IDS));
3489 }
3490 else
3491 {
3492 if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3493 /* column major */
3494 for(l=0;l<nrhs;++l)
3495 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv(transT,alphap,mtxAp, ((const rsb_byte_t*)b)+(mtxAp->el_size*ldb)*l,1, ((rsb_byte_t*)c)+(mtxAp->el_size*ldc)*l,1));
3496 else
3497 /* row major */
3498 for(l=0;l<nrhs;++l)
3499 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spsv(transT,alphap,mtxAp, ((const rsb_byte_t*)b)+(mtxAp->el_size+l),ldb, ((rsb_byte_t*)c)+(mtxAp->el_size+l),ldc));
3500 }
3501 err:
3502 RSB_DO_ERR_RETURN(errval)
3503 }
3504
rsb__util_sort_row_major_buffered(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,rsb_flags_t flags,void * WA,size_t wb)3505 rsb_err_t rsb__util_sort_row_major_buffered(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode , rsb_flags_t flags, void * WA, size_t wb )
3506 {
3507 /*!
3508
3509 Will sort as CSR (or CSC) the given coefficients.
3510 Will ignore any fancy sorting flags.
3511
3512 \param \rsb_wr_va_ia_ja_desc_msg
3513 \param \rsb_flags_inp_param_msg
3514 \param \rsb_nnz_inp_param_msg
3515 \param \rsb_nrows_inp_param_msg
3516 \param \rsb_ncols_inp_param_msg
3517 \param \rsb_type_param_msg
3518 \return \rsberrcodemsg
3519 */
3520 // FIXME : should handle error conditions
3521 rsb_err_t errval = RSB_ERR_NO_ERROR;
3522 //struct rsb_mtx_partitioning_info_t pinfop;
3523
3524 if(RSB_MATRIX_UNSUPPORTED_TYPE(typecode))
3525 {
3526 errval = RSB_ERR_UNSUPPORTED_TYPE;goto err;
3527 }
3528 #if 0
3529 pinfop.rpntr = rsb__util_get_partitioning_array( 1, m , &pinfop.M_b, flags),
3530 pinfop.cpntr = rsb__util_get_partitioning_array( 1, k , &pinfop.K_b, flags),
3531 rsb__pinfo_init( &pinfop, pinfop.M_b, pinfop.K_b, pinfop.rpntr, pinfop.cpntr, m,k);/* FIXME : is this ok ? */
3532 errval = rsb__do_util_sortcoo(VA,IA,JA,m,k,nnz,typecode,&pinfop,flags,NULL,0);
3533 RSB_CONDITIONAL_FREE(pinfop.rpntr);
3534 RSB_CONDITIONAL_FREE(pinfop.cpntr);
3535 #else
3536 //Remove internal support for RSB_FLAG_EXPERIMENTAL_IN_PLACE_PERMUTATION_SORT):
3537 //RSB_DO_FLAG_SUBST(flags,RSB_INTERNAL_FLAG_CSR_SORTING_MASK,RSB_FLAG_WANT_BCSS_STORAGE|RSB_FLAG_EXPERIMENTAL_IN_PLACE_PERMUTATION_SORT);
3538 RSB_DO_FLAG_SUBST(flags,RSB_INTERNAL_FLAG_CSR_SORTING_MASK,RSB_FLAG_WANT_BCSS_STORAGE);
3539 errval = rsb__do_util_sortcoo(VA,IA,JA,m,k,nnz,typecode,NULL,flags,WA,wb);
3540 #endif
3541 err:
3542 RSB_DO_ERR_RETURN(errval)
3543 }
3544
rsb__basename(const rsb_char_t * path)3545 const rsb_char_t *rsb__basename(const rsb_char_t *path)
3546 {
3547 rsb_int_t sl;
3548
3549 if(!path)
3550 return path;
3551 sl = rsb__util_strlen(path);
3552 while(sl>0 && path[sl-1]!=RSB_DIR_SEPARATOR)
3553 --sl;
3554 return path+sl;
3555 }
3556
rsb__do_set_elements(struct rsb_mtx_t * mtxAp,const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_flags_t flags)3557 rsb_err_t rsb__do_set_elements(struct rsb_mtx_t * mtxAp, const void * VA, const rsb_coo_idx_t *IA, const rsb_coo_idx_t *JA, rsb_nnz_idx_t nnz, rsb_flags_t flags)
3558 {
3559 rsb_coo_idx_t k;
3560 rsb_err_t errval = RSB_ERR_NO_ERROR;
3561 rsb_nnz_idx_t ifo = ( flags & RSB_FLAG_FORTRAN_INDICES_INTERFACE )?1:0;
3562
3563 if(!IA || !VA || !JA || !mtxAp)
3564 {
3565 errval = RSB_ERR_BADARGS;
3566 goto err;
3567 }
3568
3569 #if RSB_WANT_COO_BEGIN
3570 if( RSB_MTX_HBDF( mtxAp) )
3571 {
3572 errval = rsb__BLAS_Xuscr_insert_entries(RSB_MTX_HBDFH(mtxAp),nnz,VA,IA,JA);
3573 goto err;
3574 }
3575 #endif /* RSB_WANT_COO_BEGIN */
3576 for(k=0;k<nnz;++k)
3577 errval |= rsb__do_upd_coo_element(mtxAp,((const rsb_char_t*)VA)+mtxAp->el_size*k,IA[k]-ifo,JA[k]-ifo,flags);
3578 err:
3579 RSB_DO_ERR_RETURN(errval)
3580 }
3581
rsb__set_ldX_for_spmm(rsb_trans_t transA,const struct rsb_mtx_t * mtxAp,rsb_coo_idx_t nrhs,rsb_flags_t order,rsb_nnz_idx_t * ldBp,rsb_nnz_idx_t * ldCp)3582 rsb_err_t rsb__set_ldX_for_spmm(rsb_trans_t transA, const struct rsb_mtx_t * mtxAp, rsb_coo_idx_t nrhs, rsb_flags_t order, rsb_nnz_idx_t * ldBp, rsb_nnz_idx_t * ldCp)
3583 {
3584 if ( order == RSB_FLAG_WANT_ROW_MAJOR_ORDER )
3585 {
3586 if( !*ldBp )
3587 *ldBp = nrhs;
3588 if( !*ldCp )
3589 *ldCp = nrhs;
3590 }
3591 else
3592 if ( order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
3593 {
3594 if( !*ldBp )
3595 {
3596 if ( transA == RSB_TRANSPOSITION_N )
3597 *ldBp = mtxAp->nc;
3598 else
3599 *ldBp = mtxAp->nr;
3600 }
3601 if( !*ldCp )
3602 {
3603 if ( transA == RSB_TRANSPOSITION_N )
3604 *ldCp = mtxAp->nr;
3605 else
3606 *ldCp = mtxAp->nc;
3607 }
3608 }
3609 else
3610 goto err;
3611
3612 if( !(*ldBp) || (!*ldCp) )
3613 goto err;
3614
3615 if ( order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER )
3616 {
3617 if( *ldCp < rsb__do_get_rows_of(mtxAp,transA) || *ldBp < rsb__do_get_columns_of(mtxAp,transA) )
3618 RSB_PERR_GOTO(err,RSB_ERRM_ES);
3619 }
3620 else
3621 if ( order == RSB_FLAG_WANT_ROW_MAJOR_ORDER )
3622 {
3623 if( *ldCp < nrhs || *ldBp < nrhs )
3624 RSB_PERR_GOTO(err,RSB_ERRM_ES);
3625 }
3626
3627 return RSB_ERR_NO_ERROR;
3628 err:
3629 return RSB_ERR_BADARGS;
3630 }
3631
rsb__do_spmm(rsb_trans_t transA,const void * alphap,const struct rsb_mtx_t * mtxAp,rsb_coo_idx_t nrhs,rsb_flags_t order,const void * b,rsb_nnz_idx_t ldb,const void * betap,void * c,rsb_nnz_idx_t ldc,enum rsb_op_flags_t op_flags)3632 rsb_err_t rsb__do_spmm(rsb_trans_t transA, const void * alphap, const struct rsb_mtx_t * mtxAp, rsb_coo_idx_t nrhs, rsb_flags_t order, const void * b, rsb_nnz_idx_t ldb, const void * betap, void * c, rsb_nnz_idx_t ldc, enum rsb_op_flags_t op_flags)
3633 {
3634 /*!
3635 \ingroup rsb_doc_matrix_operations
3636
3637 FIXME: this function name is WRONG
3638
3639 when x is a multivector with nrhs elements, b elements having stride ldb and c elements having stride ldc
3640
3641 \return \rsberrcodemsg
3642 */
3643 /* FIXME : and error detection ?
3644 * e.g.:
3645 if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER && ldb<mtxAp->nr && transA=...)
3646 * **/
3647 /* TODO: incx,incy **/
3648 rsb_coo_idx_t l = 0;
3649 rsb_err_t errval = RSB_ERR_NO_ERROR;
3650 const rsb_coo_idx_t incx = 1,incy = 1;
3651 const rsb_byte_t * bp = b;
3652 rsb_byte_t * cp = c;
3653 rsb_aligned_t pone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
3654
3655 #if RSB_ALLOW_ZERO_DIM
3656 if(RSB_ANY_MTX_DIM_ZERO(mtxAp))
3657 goto err; /* FIXME: skipping checks on ldB, ldC, op_flags*/
3658 #endif
3659
3660 if(!mtxAp || !b || !c || !nrhs /* transA*/ ||
3661 ( order != RSB_FLAG_WANT_COLUMN_MAJOR_ORDER && order != RSB_FLAG_WANT_ROW_MAJOR_ORDER ) )
3662 {
3663 errval = RSB_ERR_BADARGS;
3664 goto err;
3665 }
3666
3667 if(!alphap || !betap)
3668 {
3669 rsb__fill_with_ones(&pone[0],mtxAp->typecode,1,1);
3670 if(!alphap)
3671 alphap = &pone[0];
3672 if(!betap)
3673 betap = &pone[0];
3674 }
3675
3676 errval = rsb__set_ldX_for_spmm(transA, mtxAp, nrhs, order, &ldb, &ldc);
3677 if(RSB_SOME_ERROR(errval))
3678 {
3679 errval = RSB_ERR_BADARGS;
3680 goto err;
3681 }
3682
3683 #ifdef RSB_HAVE_OPTYPE_SPMM_AZ
3684 /*
3685 return ...
3686 SPMM_AZ is not yet complete
3687 */
3688 #endif /* RSB_HAVE_OPTYPE_SPMM_AZ */
3689 if( rsb_global_session_handle.want_outer_spmm==0 ) /* 0 == yes */
3690 {
3691 /* inner loop: fast */
3692 if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3693 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp, bp,incx, betap, cp,incy,op_flags,nrhs,ldc, ldb ));
3694 else
3695 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp, bp,ldb , betap, cp, ldc ,op_flags,nrhs,incx,incy));
3696 }
3697 else
3698 {
3699 /* outer loop: slow */
3700 if(order == RSB_FLAG_WANT_COLUMN_MAJOR_ORDER)
3701 for(l=0;l<nrhs;++l)
3702 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp,bp+(mtxAp->el_size*ldb)*l,incx, betap, cp+(mtxAp->el_size*ldc)*l,incy,op_flags RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS));
3703 else
3704 for(l=0;l<nrhs;++l)
3705 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transA,alphap,mtxAp,bp+(mtxAp->el_size+l ) ,ldb , betap, cp+(mtxAp->el_size+l ) ,ldc ,op_flags RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS));
3706 }
3707 err:
3708 RSB_DO_ERR_RETURN(errval)
3709
3710 }
3711
rsb__do_spmm_general(const struct rsb_mtx_t * mtxAp,const void * b,void * c,const void * alphap,const void * betap,rsb_coo_idx_t incx,rsb_coo_idx_t incy,rsb_trans_t transA,enum rsb_op_flags_t op_flags,rsb_flags_t order,const rsb_int_t nrhs,const size_t outnri,const size_t rhsnri)3712 rsb_err_t rsb__do_spmm_general(const struct rsb_mtx_t * mtxAp, const void * b, void * c, const void *alphap, const void * betap, rsb_coo_idx_t incx, rsb_coo_idx_t incy, rsb_trans_t transA, enum rsb_op_flags_t op_flags, rsb_flags_t order,const rsb_int_t nrhs, const size_t outnri, const size_t rhsnri)
3713 {
3714 /* */
3715 rsb_err_t errval = RSB_ERR_NO_ERROR;
3716
3717 if(nrhs != 1)
3718 errval = rsb__do_spmm(transA, alphap, mtxAp, nrhs, order, b, rhsnri, betap, c, outnri, op_flags);
3719 else
3720 errval = rsb__do_spmv_general(transA, alphap, mtxAp, b, incx, betap, c, incy, op_flags RSB_OUTER_NRHS_SPMV_ARGS_IDS);
3721 return errval;
3722 }
3723
rsb__do_transpose(struct rsb_mtx_t ** mtxApp,rsb_bool_t want_conj)3724 rsb_err_t rsb__do_transpose(struct rsb_mtx_t ** mtxApp, rsb_bool_t want_conj)
3725 {
3726 // TODO: and what to to if data arrays are externally allocated ?
3727 rsb_err_t errval = RSB_ERR_NO_ERROR;
3728 struct rsb_mtx_t*tmatrix = NULL;
3729 struct rsb_mtx_t*mtxAp = NULL;
3730 struct rsb_coo_matrix_t coo;
3731 struct rsb_mtx_t *fm = NULL;
3732
3733 if(!mtxApp || !*mtxApp)
3734 {
3735 errval = RSB_ERR_BADARGS;
3736 RSB_PERR_GOTO(err, RSB_ERRM_E_MTXAP);
3737 }
3738 mtxAp = *mtxApp;
3739 RSB_DO_FLAG_FLIP_UPLO(mtxAp->flags);
3740 RSB_INIT_CXX_FROM_MTX(&coo,mtxAp);
3741 if(rsb__allocate_coo_matrix_t(&coo)!=&coo)
3742 {
3743 RSB_PERR_GOTO(err,RSB_ERRM_ES);
3744 }
3745 errval = rsb__do_get_rows_sparse(RSB_TRANSPOSITION_N,NULL,mtxAp,coo.VA,coo.IA,coo.JA,0,mtxAp->nr-1,&coo.nnz,RSB_FLAG_NOFLAGS);
3746 if(RSB_SOME_ERROR(errval))
3747 {
3748 goto err;
3749 }
3750 fm = rsb__do_get_first_submatrix(mtxAp);
3751 if(!fm)
3752 goto err; // FIXME
3753 if(want_conj)
3754 errval = rsb__util_do_conjugate(coo.VA,coo.typecode,coo.nnz);
3755 if(RSB_SOME_ERROR(errval))goto err;
3756 RSB_SWAP(rsb_coo_idx_t*,coo.IA,coo.JA);
3757 RSB_SWAP(rsb_coo_idx_t,coo.nr,coo.nc);
3758 RSB_COO_MEMCPY_parallel(fm->VA,fm->bpntr,fm->bindx,coo.VA,coo.IA,coo.JA,0,0,coo.nnz,fm->el_size);
3759 RSB_DO_FLAG_DEL(mtxAp->flags,RSB_FLAG_SORTED_INPUT);
3760 tmatrix = rsb__mtx_alloc_inner(fm->VA,fm->bpntr,fm->bindx,coo.nnz,0,0,coo.typecode,coo.nr,coo.nc,RSB_DEFAULT_ROW_BLOCKING,RSB_DEFAULT_COL_BLOCKING,mtxAp->flags,&errval);
3761 RSB_DO_FLAG_ADD(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3762 rsb__destroy_inner(mtxAp);
3763 rsb__destroy_coo_matrix_t(&coo);
3764 *mtxApp = tmatrix;
3765
3766 //RSB_ERROR("FIXME: should implement tranpose functionality !");
3767 err:
3768 RSB_DO_ERR_RETURN(errval)
3769 }
3770
rsb__do_get_elements(const struct rsb_mtx_t * mtxAp,void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_flags_t flags)3771 rsb_err_t rsb__do_get_elements(const struct rsb_mtx_t * mtxAp, void * VA, const rsb_coo_idx_t *IA, const rsb_coo_idx_t *JA, rsb_nnz_idx_t nnz, rsb_flags_t flags)
3772 {
3773 rsb_coo_idx_t k;
3774 rsb_err_t errval = RSB_ERR_NO_ERROR;
3775 rsb_nnz_idx_t ifo = ( flags & RSB_FLAG_FORTRAN_INDICES_INTERFACE )?1:0;
3776
3777 if(!IA || !VA || !JA || !mtxAp)
3778 {
3779 errval = RSB_ERR_BADARGS;
3780 goto err;
3781 }
3782
3783 for(k=0;k<nnz;++k)
3784 errval |= rsb__do_get_coo_element(mtxAp,((rsb_char_t*)VA)+mtxAp->el_size*k,IA[k]-ifo,JA[k]-ifo);
3785 err:
3786 RSB_DO_ERR_RETURN(errval)
3787 }
3788
rsb__do_set_initopt_as_string(const rsb_char_t * opn,const rsb_char_t * arg)3789 rsb_err_t rsb__do_set_initopt_as_string(const rsb_char_t *opn, const rsb_char_t *arg)
3790 {
3791 /* FIXME: document me */
3792 return rsb__stropts_set(opn,arg);
3793 }
3794
rsb__do_lib_get_info_str(int what,rsb_char_t * sbuf,size_t buflen)3795 rsb_err_t rsb__do_lib_get_info_str(int what, rsb_char_t* sbuf, size_t buflen)
3796 {
3797 rsb_err_t errval = RSB_ERR_NO_ERROR;
3798 size_t rl = buflen;
3799
3800 if( sbuf == NULL )
3801 {
3802 errval = RSB_ERR_BADARGS;
3803 goto err;
3804 }
3805
3806 sbuf[0] = RSB_NUL;
3807 #ifdef RSB_CC
3808 snprintf(sbuf+(buflen-rl),rl,"CC=%s ", RSB_CC );
3809 #else /* RSB_CC */
3810 errval |= RSB_ERR_GENERIC_ERROR;
3811 #endif /* RSB_CC */
3812 rl -= strlen(sbuf);
3813 #ifdef RSB_CFLAGS
3814 snprintf(sbuf+(buflen-rl),rl,"CFLAGS=%s",RSB_CFLAGS);
3815 #else /* RSB_CFLAGS */
3816 errval |= RSB_ERR_GENERIC_ERROR;
3817 #endif /* RSB_CFLAGS */
3818 err:
3819 return errval;
3820 }
3821
rsb__do_mtx_alloc_from_coo_const(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3822 struct rsb_mtx_t * rsb__do_mtx_alloc_from_coo_const(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * JA, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3823
3824 {
3825 rsb_err_t errval = RSB_ERR_NO_ERROR;
3826 void *VA_ = NULL;
3827 rsb_coo_idx_t *IA_ = NULL, *JA_ = NULL;
3828 struct rsb_mtx_t * mtxAp = NULL;
3829
3830 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
3831 {
3832 errval = /*RSB_ERR_BADARGS|*/RSB_ERR_COULD_NOT_HONOUR_EXTERNALLY_ALLOCATION_FLAGS;
3833 RSB_PERR_GOTO(err,RSB_ERRM_CNHEAF);
3834 }
3835
3836 RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3837
3838 if(nnzA>0)
3839 {
3840 rsb_coo_idx_t offi = 0;
3841
3842 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3843 offi = 1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3844 errval = rsb__util_coo_alloc_copy_and_stats(&VA_,&IA_,&JA_,VA,IA,JA,nrA?NULL:&nrA,ncA?NULL:&ncA,nnzA,0,typecode,offi,0,RSB_FLAG_NOFLAGS,&flags);
3845
3846 if(!VA_ || !IA_ || !JA_)
3847 {
3848 errval = RSB_ERR_ENOMEM;
3849 RSB_PERR_GOTO(err,RSB_ERRM_E_VIJ);
3850 }
3851 }
3852 else
3853 {
3854 #if !RSB_ALLOW_EMPTY_MATRICES
3855 /* FIXUP CASE FOR 0-NNZ MATRICES AND IMPLICIT DIAGONAL */
3856 if(RSB_INVALID_NNZ_COUNT_FOR_FLAGS(nnzA,flags))
3857 {
3858 errval = RSB_ERR_BADARGS;
3859 RSB_PERR_GOTO(err,RSB_ERRM_CBAEM);
3860 }
3861 #endif /* RSB_ALLOW_EMPTY_MATRICES */
3862 }
3863 RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3864
3865 mtxAp = rsb__mtx_alloc_inner(VA_,IA_,JA_,nnzA,0,0,typecode,nrA,ncA,brA,bcA,flags,&errval);
3866 if(mtxAp && errval == RSB_ERR_NO_ERROR)
3867 goto ok;
3868 /* FIXME: and if !matrix but errval ? */
3869 err:
3870 RSB_CONDITIONAL_FREE(IA_);
3871 RSB_CONDITIONAL_FREE(JA_);
3872 RSB_CONDITIONAL_FREE(VA_);
3873 ok:
3874 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3875 return mtxAp;
3876 }
3877
rsb__do_mtx_alloc_from_coo_inplace(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3878 struct rsb_mtx_t * rsb__do_mtx_alloc_from_coo_inplace(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3879 {
3880 rsb_err_t errval = RSB_ERR_NO_ERROR;
3881 struct rsb_mtx_t * mtxAp = NULL;
3882 rsb_coo_idx_t roff = 0,coff = 0;
3883
3884 RSB_ASSERT(VA || nnzA == 0 );
3885 RSB_ASSERT(IA || nnzA == 0 );
3886 RSB_ASSERT(JA || nnzA == 0 );
3887
3888 if(RSB_MATRIX_UNSUPPORTED_TYPE(typecode))
3889 {
3890 errval = RSB_ERR_UNSUPPORTED_TYPE;
3891 RSB_PERR_GOTO(err,RSB_ERRM_EM);
3892 }
3893 RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3894 RSB_DO_FLAG_ADD(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3895 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3896 roff = -1,coff = -1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3897 mtxAp = rsb__mtx_alloc_inner(VA,IA,JA,nnzA,roff,coff,typecode,nrA,ncA,brA,bcA,flags,&errval);
3898 err:
3899 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3900 return mtxAp;
3901 }
3902
rsb__do_mtx_alloc_from_csr_const(const void * VA,const rsb_coo_idx_t * RP,const rsb_coo_idx_t * JA,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3903 struct rsb_mtx_t * rsb__do_mtx_alloc_from_csr_const(const void *VA, const rsb_coo_idx_t * RP, const rsb_coo_idx_t * JA, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3904 {
3905 rsb_err_t errval = RSB_ERR_NO_ERROR;
3906 void *VA_ = NULL;
3907 rsb_coo_idx_t *IA_ = NULL,*JA_ = NULL;
3908 struct rsb_mtx_t * mtxAp = NULL;
3909 size_t cnnz = RSB_MAX(nnzA,nrA+1), cis = sizeof(rsb_coo_idx_t),nvs = RSB_SIZEOF(typecode);
3910 rsb_coo_idx_t roff = 0,coff = 0;
3911
3912 RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3913
3914 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
3915 {
3916 errval = /*RSB_ERR_BADARGS|*/RSB_ERR_COULD_NOT_HONOUR_EXTERNALLY_ALLOCATION_FLAGS;
3917 RSB_PERR_GOTO(err,RSB_ERRM_BFEANS);
3918 }
3919
3920 IA_ = rsb__clone_area_with_extra(RP,cis*(nrA+1),0,cis*(cnnz-(nrA+1)));
3921 JA_ = rsb__clone_area_with_extra(JA,cis*(nnzA ),0,cis*(cnnz-nnzA));
3922 VA_ = rsb__clone_area_with_extra(VA,nvs*(nnzA ),0,nvs*(cnnz-nnzA));
3923
3924 if(!VA_ || !IA_ || !JA_)
3925 {
3926 errval = RSB_ERR_ENOMEM;
3927 RSB_PERR_GOTO(err,RSB_ERRM_E_VIJ);
3928 }
3929 errval = rsb__util_uncompress_row_pointers_array((rsb_coo_idx_t*)RP,nrA,flags,RSB_FLAG_C_INDICES_INTERFACE,IA_);
3930
3931 if(RSB_SOME_ERROR(errval))
3932 {
3933 RSB_PERR_GOTO(err,RSB_ERRM_EM);
3934 }
3935 if( RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3936 coff = -1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3937
3938 RSB_DEBUG_ASSERT(roff>=-1 && coff>=-1); /* for Fortran */
3939 RSB_DO_FLAG_ADD(flags,RSB_FLAG_SORTED_INPUT);
3940 mtxAp = rsb__mtx_alloc_inner(VA_,IA_,JA_,nnzA,roff,coff,typecode,nrA,ncA,brA,bcA,flags,errvalp);
3941 err:
3942 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3943 return mtxAp;
3944 }
3945
rsb__do_mtx_alloc_from_csc_const(const void * VA,const rsb_coo_idx_t * IA,const rsb_coo_idx_t * CP,rsb_nnz_idx_t nnzA,rsb_type_t typecode,rsb_coo_idx_t nrA,rsb_coo_idx_t ncA,rsb_blk_idx_t brA,rsb_blk_idx_t bcA,rsb_flags_t flags,rsb_err_t * errvalp)3946 struct rsb_mtx_t * rsb__do_mtx_alloc_from_csc_const(const void *VA, const rsb_coo_idx_t * IA, const rsb_coo_idx_t * CP, rsb_nnz_idx_t nnzA, rsb_type_t typecode, rsb_coo_idx_t nrA, rsb_coo_idx_t ncA, rsb_blk_idx_t brA, rsb_blk_idx_t bcA, rsb_flags_t flags, rsb_err_t * errvalp)
3947 {
3948 rsb_err_t errval = RSB_ERR_NO_ERROR;
3949 void *VA_ = NULL;
3950 rsb_coo_idx_t *IA_ = NULL,*JA_ = NULL;
3951 struct rsb_mtx_t * mtxAp = NULL;
3952 rsb_nnz_idx_t maxdim = RSB_MAX(nnzA,RSB_MAX(nrA+1,ncA+1));
3953
3954 RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(flags);
3955
3956 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS))
3957 {
3958 errval = /*RSB_ERR_BADARGS|*/RSB_ERR_COULD_NOT_HONOUR_EXTERNALLY_ALLOCATION_FLAGS;
3959 RSB_PERR_GOTO(err,RSB_ERRM_EM);
3960 }
3961
3962 if(nnzA>0)
3963 {
3964 rsb_time_t dt;
3965 rsb_coo_idx_t offi = 0;
3966
3967 if(RSB_SOME_ERROR(errval = rsb__util_coo_alloc(&VA_,&IA_,&JA_,maxdim,typecode,RSB_BOOL_FALSE)))
3968 {
3969 RSB_PERR_GOTO(err,RSB_ERRM_EM);
3970 }
3971 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
3972 offi = 1, RSB_DO_FLAG_DEL(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE);
3973 //errval=
3974 dt = - rsb_time();
3975 rsb_util_csc2csr(VA,IA,CP,VA_,IA_,JA_,nrA,ncA,nnzA,typecode,offi,0,&flags);/* FIXME: assembly shall give the user chance to pass offo and offi */
3976 dt += rsb_time();
3977 //printf("csc 2 csr took %lg s\n",dt);
3978 }
3979 else
3980 {
3981 }
3982 if(RSB_SOME_ERROR(errval))
3983 {
3984 RSB_PERR_GOTO(err,RSB_ERRM_EM);
3985 }
3986 RSB_DO_FLAG_ADD(flags,RSB_FLAG_SORTED_INPUT);
3987 mtxAp = rsb_mtx_alloc_from_csr_inplace (VA_,IA_,JA_,nnzA,typecode,nrA,ncA,brA,bcA,flags,errvalp);
3988 if(mtxAp)
3989 RSB_DO_FLAG_DEL(mtxAp->flags,RSB_FLAG_EXTERNALLY_ALLOCATED_ARRAYS);
3990 err:
3991 RSB_CONDITIONAL_ERRPSET(errvalp,errval);
3992 return mtxAp;
3993 }
3994
3995
3996 /* @endcond */
3997