1 /*
2 
3 Copyright (C) 2008-2021 Michele Martone
4 
5 This file is part of librsb.
6 
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11 
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16 
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20 
21 */
22 /* @cond INNERDOC */
23 /**
24  * @author Michele Martone
25  * @file
26  * @brief Low level routines and tools for our sparse matrix formats implementations.
27  */
28 #ifndef RSB_COMMON_H_INCLUDED
29 #define RSB_COMMON_H_INCLUDED
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif /* __cplusplus */
34 #ifdef __cplusplus
35 #define restrict	/* for now, the restrict keyword is not allowed in C++ */
36 #endif  /* __cplusplus */
37 /**
38  *
39  * VBR internals the user shouldn't use, no way.
40  * This file contents are not meant to be used as an API (Application Programming Interface).
41  *
42  * Manipulate this file at your own risk.
43  *
44  */
45 #include <stdlib.h>	/* bsearch, calloc, malloc */
46 #include <stdio.h>	/* printf */
47 
48 #ifdef HAVE_CONFIG_H	/* hopefully, the only non-RSB_ prefixed symbol of ours */
49 #define RSB_HAVE_CONFIG_H HAVE_CONFIG_H
50 #endif /* HAVE_CONFIG_H */
51 
52 #ifdef RSB_HAVE_CONFIG_H		/* autotools makefiles define this */
53 #include "rsb-config.h"		/* this should be the first include */
54 #endif /* RSB_HAVE_CONFIG_H */
55 #if RSB_WANT_OMP_RECURSIVE_KERNELS
56 #include <omp.h>
57 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
58 #ifdef RSB_HAVE_UNISTD_H
59 #include <unistd.h>	/* getopt, gethostname (some system don't have it here!) */
60 #endif /* RSB_HAVE_UNISTD_H */
61 #ifdef RSB_HAVE_GETOPT_H
62 /* RSB_HAVE_GETOPT_LONG */
63 #include <getopt.h>	/* getopt_long is not always available (e.g.: on AIX, or any non GNU system) */
64 typedef struct option rsb_option;
65 #else /* RSB_HAVE_GETOPT_H  */
66 /*#undef required_argument*/
67 /*#undef no_argument*/
68 /*#undef optional_argument*/
69 #define required_argument	0 /* ONLY A STUB */
70 #define no_argument 		1 /* ONLY A STUB */
71 #define optional_argument	2 /* ONLY A STUB */
72        extern char *optarg;
73        extern int optind, opterr, optopt;
74 
75            struct rsb_option_struct {
76                const char *name;
77                int         has_arg;
78                int        *flag;
79                int         val;
80            };
81 /*!
82  * \ingroup gr_internals
83  * \brief An internal, helper structure.
84  */
85 typedef struct rsb_option_struct rsb_option;
86 #endif /* RSB_HAVE_GETOPT_H */
87 #include <ctype.h>	/*isdigit*/
88 
89 /*
90  * Comment / uncomment the following to your likes
91  */
92 #define RSB_WITH_MM
93 /*#undef RSB_WITH_MM*/
94 
95 #ifdef RSB_WITH_MM
96 #include "rsb_mmio.h"
97 #endif /* RSB_WITH_MM */
98 
99 
100 /* the NDEBUG and DEBUG symbols will affect lot of checking code */
101 /* these flags are NOT supported : they should be debugged :) */
102 /*#define NDEBUG 1*/
103 /*#undef  NDEBUG*/
104 
105 /*
106 #ifdef NDEBUG
107 #define DEBUG 1
108 #endif
109 */
110 
111 /*!
112  \internal
113  For all the situations where 'int' would be used.
114  */
115 typedef int rsb_int;
116 
117 /* some ill situations could arise the need for this */
118 #if 0
119 #ifndef NULL
120 #define NULL ((VOID *)(0))
121 #endif
122 #endif
123 #define RSB_NUL '\0'
124 
125 /*  #define RSB_INLINE inline	*/ /* experimental */
126 #define RSB_INLINE 	/* experimental */
127 
128 /* if defined, less partitioning arrays allocations will occur for plain CSR and CSC matrices (EXPERIMENTAL) */
129 #define RSB_WANT_EXPERIMENTAL_IN_PLACE_RECURSIVE	 0	/** EXPERIMENTAL */
130 #define RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS 1	/** EXPERIMENTAL: should avoid extra (non BCSR-related) arrays*/
131 #define RSB_WANT_EXPERIMENTAL_FILLIN_ESTIMATOR 2		/* */
132 
133 #define RSB_WANT_INDEX_BASED_SORT 1	/**< if defined, will enable a faster index+permutation based sorting (EXPERIMENTAL) */
134 #define RSB_WANT_Z_SORT 1		/**< if 1, enable Z sorting at all */
135 #define RSB_FORTRAN_VERBOSE_CALLS 0		/**< */
136 #define RSB_EXPERIMENTAL_WANT_PURE_BCSS 0	/**< EXPERIMENTAL : for BCSR, will prevent from allocating VBR arrays */
137 #define RSB_EXPERIMENTAL_USE_PURE_BCSS  1	/**< EXPERIMENTAL : for BCSR, will prevent from using VBR arrays   */
138 #define RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR  1	/**< EXPERIMENTAL : for BCSR, will prevent from using VBR arrays   */
139 
140 #define RSB_WANT_OMP_RECURSIVE_SPSV						1 	/**< EXPERIMENTAL  */
141 #define RSB_WANT_OMP_RECURSIVE_SPMV						1 	/**< EXPERIMENTAL  */
142 #define RSB_EXPERIMENTAL_FORCE_ROW_SUBDIVISIONS_UNTIL_CORES_NUMBER	   	0 	/**< EXPERIMENTAL  */
143 #define RSB_EXPERIMENTAL_ONE_SINGLE_LOCK_FOR_ALL_SUBMATRICES		   	(1&&RSB_WANT_OMP_RECURSIVE_KERNELS) 	/**< EXPERIMENTAL: incompatible with the prev.  */
144 
145 #define RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT 1
146 #define RSB_ALLOW_EMPTY_MATRICES 1
147 
148 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS   	(1&&RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT ) 	/**< EXPERIMENTAL  */
149 
150 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS2   	(1&&RSB_WANT_OMP_RECURSIVE_KERNELS) 	/**< EXPERIMENTAL  */
151 /* mutually exclusive options for : RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS2   	 */
152 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_WITHOUT_BLOCKING		   	1 	/**< EXPERIMENTAL  */
153 #define RSB_EXPERIMENTAL_ALTERNATING_SUBMATRIX_HEURISTIC			0	/**< EXPERIMENTAL  */
154 #define RSB_EXPERIMENTAL_WORK_BALANCING_HEURISTIC				1	/**< EXPERIMENTAL  */
155 
156 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_LINKED_LIST	0 	/**< EXPERIMENTAL  */
157 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_TREE		(0) 	/**< The standard mechanism, 2-partitioned.  */
158 #define RSB_SHOULD_FAIL_INIT_IF_MEMHIER_DETECTION_FAILS		1 	/**<  */
159 
160 #define RSB_EXPERIMENTAL_NO_SUBDIVIDE_ON_MIN_NNZ_PER_ROW_OR_COLUMN		1 	/**< Block matrix subdivision under a threshold */
161 #define RSB_EXPERIMENTAL_ROWS_SUBDIVIDE_TO_CORES_NUM				1 	/**< Subdivide to obtain no less matrices than cores */
162 #define RSB_CONST_MIN_NNZ_PER_ROW_OR_COLUMN_PER_SUBMATRIX				3	/**< Lower threshold for nnz/m or nnz/k, for subdivision, should be determined heuristically  */
163 
164 #define RSB_EXPERIMENTAL_WANT_BEST_TIMES   1 	/**< EXPERIMENTAL  */
165 #define RSB_EXPERIMENTAL_UNLIMITED_RECURSION  		0 	/**< EXPERIMENTAL  */
166 #define RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION  	0 	/**< EXPERIMENTAL, UNFINISHED, just for demo purposes  */
167 
168 #define RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY_NAIVE 		0	/**< EXPERIMENTAL */
169 #define RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY_UNSYMMETRIC 	1	/**< EXPERIMENTAL, UNIMPLEMENTED */
170 #define RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY 			RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY_NAIVE
171 
172 #define RSB_CONST_IMPOSSIBLY_BIG_TIME   		1000000000 	/**< in seconds, used when computing 'minimum' running times. any measured time interval should be less than RSB_CONST_IMPOSSIBLY_BIG_TIME.  */
173 #define RSB_MIN_ABOVE_INF(X,Y,MIN) RSB_MAX(RSB_MIN(X,Y),MIN)
174 #define RSB_CONST_TIME_FOR_MICRO_BENCHMARK   		0.1 	/**<  */
175 #define RSB_CONST_MIN_TIMES_FOR_MICRO_BENCHMARK   	10 	/**<  */
176 
177 #define RSB_EXPERIMENTAL_EXPAND_SYMMETRIC_MATRICES_BY_DEFAULT	0	/**< EXPERIMENTAL */
178 #define RSB_WANT_BOUNDED_BOXES 			1
179 #define RSB_WANT_BOUNDED_BOXES_SPMV 			(RSB_WANT_BOUNDED_BOXES && 1)
180 #define RSB_WANT_BOUNDED_BOXES_SPSV 			(RSB_WANT_BOUNDED_BOXES && 1)
181 #define RSB_WANT_EARLY_PARALLEL_REGION_JUMPOUT_SPMV	1
182 #define RSB_WANT_SM_TO_THREAD_MOD_MAPPING		1
183 #define RSB_WANT_EARLY_PARALLEL_REGION_JUMPOUT_SPSV	1
184 
185 #define RSB_WANT_BITMAP 0
186 	/** if RSB_WANT_BITMAP, should att to rsb_mtx_t:
187 	 * auxiliary data structures */
188 	/*struct rsb_options_t *options;*/	/* FIXME: deprecated -- will be deleted soon */
189 
190 #define RSB_WANT_DEBUG_VERBOSE_INTERFACE_NOTICE	(/*RSB_OUT_ERR_VERBOSITY>0 &&*/ RSB_INT_ERR_VERBOSITY>0)
191 #define RSB_WANT_PERFORMANCE_FILE	0
192 #if defined(RSB_HAVE_SIGNAL_H) /* && defined(RSB_HAVE_BITS_SIGACTION_H) */
193 #define RSB_WANT_ACTION 1
194 #else
195 #define RSB_WANT_ACTION 0
196 #endif
197 /* #define RSB_WANT_ACTION RSB_ALLOW_INTERNAL_GETENVS */ /* 1 */
198 #if RSB_WANT_ACTION
199 #define RSB_SHALL_QUIT ( rsb__quit_rsbench != 0 )
200 #define RSB_INTERNALS_RSBENCH_HEAD_DECLS extern int rsb__quit_rsbench;
201 void rsb__sigh(int signal);
202 #define RSB_SIGHR rsb__sigr();
203 #else /* RSB_WANT_ACTION */
204 #define RSB_SHALL_QUIT ( 0 != 0 )
205 #define RSB_INTERNALS_RSBENCH_HEAD_DECLS
206 #define RSB_SIGHR
207 #endif /* RSB_WANT_ACTION */
208 
209 #include "rsb_struct.h"		/* */
210 
211 /* @cond INNERDOC */
212 /*!
213  * \internal
214  * \ingroup gr_internals
215  * \brief An internal, helper structure (OBSOLETE).
216  *
217  * A rsb_options_t structure could keep track of helper information
218  * like :
219  *
220  * - desired nonzero pattern
221  * - pointers to optimal or desired functions for certain operations
222  * - ..
223  * - .. data which is perfectly optional
224  * */
225 struct rsb_options_t{
226 	/** An auxiliary bitmap */
227 	rsb_bitmap_data_t *bitmap;
228 	double a;
229 };
230 /* @endcond */
231 
232 
233 #if  RSB_FORTRAN_VERBOSE_CALLS
234 #define  RSB_FORTRAN_VERBOSE_CALL(M) RSB_ERROR(M)
235 #else /* RSB_FORTRAN_VERBOSE_CALLS */
236 #define  RSB_FORTRAN_VERBOSE_CALL(M)
237 #endif /* RSB_FORTRAN_VERBOSE_CALLS */
238 
239 
240 /*                                DEBUG FLAGS                                */
241 /*
242 	Enable any combination of the following flags to activate debug code.
243 	Do this only when debugging/developing, because it will slow down the code a lot,
244 */
245 
246 #define RSB_WANT_DEBUG_PARANOID_ASSERTS 0	/**< if 1, will activate a number of assert() calls which won't change the code flow but will check for anomalous error conditions */
247 #if RSB_WANT_DEBUG_PARANOID_ASSERTS
248 #define RSB_DEBUG_ASSERT(e) assert(e)
249 #else /* RSB_WANT_DEBUG_PARANOID_ASSERTS */
250 #define RSB_DEBUG_ASSERT(e)
251 #endif /* RSB_WANT_DEBUG_PARANOID_ASSERTS */
252 
253 #define RSB_ASSERT(e) assert(e)		/* NOTE: in the future, could be replaced with some {if(..)goto err;} or exit()-like statement  */
254 
255 /* commented out 20120915, since it was not used anyway most of the time
256 #ifdef DEBUG
257 #define RSB_DEBUG_BITMAP 1
258 #define RSB_DEBUG_BLOCK_STUFF 1
259 #define RSB_DEBUG_SORT_STUFF 1
260 #endif
261 */
262 
263 /*#define RSB_QUIET 1*/
264 
265 /* FIXME : TODO : join these macros as a single debug flag */
266 #define RSB_MEM_DEBUG 		0	/* if 1, will make trigger printouts on allocations and deallocations */
267 #define RSB_QUIET_MEM_ERRORS	0	/* if 1, will not even print out fatal error conditions */
268 
269 /*                            END DEBUG FLAGS                                */
270 
271 
272 /** Macros to check basic indices values validity ( FIXME : unfinished, should be much stricter )  */
273 #define RSB_INVALID_COO_INDEX(I)	((I)>RSB_MAX_MATRIX_DIM || (I)<0)	/* should fail only if signed */
274 #define RSB_INVALID_NNZ_INDEX(I)	((I)>RSB_MAX_MATRIX_NNZ)
275 #define RSB_INVALID_BLK_INDEX(I)	(RSB_INVALID_COO_INDEX(I))
276 #define RSB_INVALID_NNZ_COUNT(I)	((I)<1L || ( RSB_NNZ_ADD_OVERFLOW((I),RSB_INDEX_OF_SAFE_EXTRA) ))
277 #define RSB_INVALID_NNZ_COUNT_FOR_FLAGS(I,F) ((!RSB_DO_FLAG_HAS((F),RSB_FLAG_UNIT_DIAG_IMPLICIT)) && RSB_INVALID_NNZ_COUNT(I))
278 
279 #define RSB_DO_FLAG_HAVE_AND(V1,V2,F) RSB_BOOL_AND(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
280 #define RSB_DO_FLAG_HAVE_NAND(V1,V2,F) RSB_BOOL_NAND(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
281 #define RSB_DO_FLAG_HAVE_XOR(V1,V2,F) RSB_BOOL_XOR(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
282 #define RSB_DO_FLAG_HAVE_OR(V1,V2,F) RSB_BOOL_OR(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
283 #define RSB_DO_FLAG_HAVE_NOR(V1,V2,F) RSB_BOOL_NOR(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
284 #define RSB_DO_FLAG_SUBST(FLAGSVAR,FLAGS_OLD,FLAGS_NEW) RSB_DO_FLAG_DEL(FLAGSVAR,(FLAGS_OLD)), RSB_DO_FLAG_ADD(FLAGSVAR,(FLAGS_NEW))
285 
286 #define RSB_INVALID_COO_COUNT(I)	((I)<1L || ( RSB_COO_ADD_OVERFLOW((I),RSB_INDEX_OF_SAFE_EXTRA) ))
287 #define RSB_IS_VALID_NNZ_COUNT(I)	(!RSB_INVALID_NNZ_COUNT(I))
288 #define RSB_IS_VALID_COO_INDEX(I)	(!RSB_INVALID_COO_INDEX(I))
289 #define RSB_IS_VALID_COO_DIM(I)		(!RSB_INVALID_COO_COUNT(I))
290 #define RSB_IS_VALID_BLK_INDEX(I)	(!RSB_INVALID_BLK_INDEX(I))
291 #define RSB_IS_VALID_NNZ_INDEX(I)	(!RSB_INVALID_NNZ_INDEX(I))
292 #define RSB_IS_VALID_INCX_VALUE(I)	(!RSB_INVALID_NNZ_COUNT(I))
293 #define RSB_ARE_VALID_MATRIX_INIT_PARS(R,C,NNZ,TYPE) (	\
294 	RSB_IS_VALID_NNZ_COUNT(NNZ)&&			\
295 	RSB_IS_VALID_COO_INDEX(R)&&			\
296 	RSB_IS_VALID_COO_INDEX(C)&&			\
297 	(!RSB_MATRIX_UNSUPPORTED_TYPE(TYPE)) )
298 /*#define RSB_IS_VALID_NNZ_SUM(NZ1,NZ2)	RSB_IS_VALID_NNZ_COUNT(((size_t)(NZ1))+((size_t)(NZ2)))*/
299 #define RSB_IS_VALID_NNZ_SUM(NZ1,NZ2)	(((size_t)(RSB_MAX_MATRIX_NNZ))>=(((size_t)(NZ1))+((size_t)(NZ2))))
300 #define RSB_IS_INVALID_TYPE_SIZE(TS) ((TS)<1)
301 
302 #define RSB_IS_VALID_TRANS(T)  ((T)>=RSB_MIN(RSB_MIN(RSB_TRANSPOSITION_T,RSB_TRANSPOSITION_C),(RSB_TRANSPOSITION_N)) && (T)<=RSB_MAX(RSB_MAX(RSB_TRANSPOSITION_T,RSB_TRANSPOSITION_C),(RSB_TRANSPOSITION_N))) /* */
303 #define RSB_IS_VALID_THREAD_COUNT(C)	((C)> 0 && (C)<=RSB_CONST_MAX_SUPPORTED_CORES)	/* */
304 #define RSB_IS_VALID_THREAD_SPEC(C)	((C)>=0 && (C)<=RSB_CONST_MAX_SUPPORTED_CORES)	/* */
305 
306 /** An initializer value for index variables. */
307 #define RSB_INI ((rsb_coo_idx_t)(-1))
308 
309 
310 #include <stdlib.h>		/* basic types and functions definitions */
311 
312 /*
313  * Bitmap stuff macros and functions.
314  * Uses row major order by default.
315  * By defining RSB_BITMAP_ROW_MAJOR_ORDER, row major order will be adopted.
316  *
317  * p.s.: please DO NOT use the following fixed macros outside the nearby macros.
318  * */
319 /*#define RSB_BITMAP_ROW_MAJOR_ORDER 1*/
320 #define RSB_BITS_PER_INT  	(sizeof(rsb_bitmap_data_t)*RSB_CHAR_BIT)
321 #define RSB_BITS_PER_ROW(cols)  ((cols)+(RSB_BITS_PER_INT-1))
322 #define RSB_BYTES_PER_ROW(cols) ((cols+(RSB_CHAR_BIT-1))/RSB_CHAR_BIT)
323 #define RSB_INTS_PER_ROW(cols)   ((cols+((RSB_BITS_PER_INT)-1))/(RSB_BITS_PER_INT))
324 #define RSB_INT_IN_ROW(cols)   ((cols)/(RSB_BITS_PER_INT))
325 
326 /* note that this is (logically) machine independent code */
327 #define RSB_SET_BIT(p,b)  (*(rsb_bitmap_data_t*)(p))=(*(rsb_bitmap_data_t*)(p)|(1<<(b)))
328 #define RSB_UNSET_BIT(p,b)  (*(rsb_bitmap_data_t*)(p))=(*(rsb_bitmap_data_t*)(p)&~(1<<(b)))
329 #define RSB_GET_BIT(p,b)  ((*(rsb_bitmap_data_t*)(p))&(1<<(b)))
330 #define RSB_BITMAP_POINTER(p,rw,r,c) (((rsb_bitmap_data_t*)(p))+(RSB_INTS_PER_ROW(rw)*(r)+RSB_INT_IN_ROW(c)))
331 
332 #ifdef RSB_BITMAP_ROW_MAJOR_ORDER
333 /* Note that only a swap in the following three macros is needed to switch the storage format of our bitmap */
334 #define RSB_BITMAP_GET(p,rows,cols,r,c) RSB_GET_BIT((RSB_BITMAP_POINTER((p),(rows),(c),(r))),((r)%(RSB_BITS_PER_INT)))
335 #define RSB_BITMAP_SET(p,rows,cols,r,c) RSB_SET_BIT((RSB_BITMAP_POINTER((p),(rows),(c),(r))),((r)%(RSB_BITS_PER_INT)))
336 #define RSB_BITMAP_UNSET(p,rows,cols,r,c) RSB_UNSET_BIT((RSB_BITMAP_POINTER((p),(rows),(c),(r))),((r)%(RSB_BITS_PER_INT)))
337 #define RSB_BYTES_PER_BITMAP_(ld,d) (sizeof(rsb_bitmap_data_t)*(RSB_INTS_PER_ROW(ld)) * (d))
338 #define RSB_BITMAP_CLEAR(p,rows,cols)	RSB_BZERO((p),(RSB_BYTES_PER_BITMAP_(cols,rows)))
339 #else /* RSB_BITMAP_ROW_MAJOR_ORDER */
340 #define RSB_BITMAP_GET(p,rows,cols,r,c) RSB_GET_BIT((RSB_BITMAP_POINTER((p),(cols),(r),(c))),((c)%(RSB_BITS_PER_INT)))
341 #define RSB_BITMAP_SET(p,rows,cols,r,c) RSB_SET_BIT((RSB_BITMAP_POINTER((p),(cols),(r),(c))),((c)%(RSB_BITS_PER_INT)))
342 #define RSB_BITMAP_UNSET(p,rows,cols,r,c) RSB_UNSET_BIT((RSB_BITMAP_POINTER((p),(cols),(r),(c))),((c)%(RSB_BITS_PER_INT)))
343 #define RSB_BYTES_PER_BITMAP_(d,ld) (sizeof(rsb_bitmap_data_t)*(RSB_INTS_PER_ROW(ld)) * (d))
344 #define RSB_BITMAP_CLEAR(p,rows,cols)	RSB_BZERO((p),(RSB_BYTES_PER_BITMAP_(rows,cols)))
345 #endif /* RSB_BITMAP_ROW_MAJOR_ORDER */
346 #define RSB_WORDS_PER_BITMAP(ld,d) ((RSB_BYTES_PER_BITMAP_(ld,d)+(sizeof(rsb_bitmap_data_t)-1))/sizeof(rsb_bitmap_data_t))
347 
348 #define RSB_BYTES_PER_BITMAP(rows,cols) RSB_BYTES_PER_BITMAP_(rows,cols)
349 #define RSB_BLOCK_UNSET_BIT_FOR_NNZ(IA,JA,k,M) {rsb_coo_idx_t i=(RSB_GET_BLOCK_ROW_FOR_NZ(IA,(M))); rsb_coo_idx_t j=(RSB_GET_BLOCK_COL_FOR_NZ(JA,M)); RSB_BITMAP_UNSET((M)->options->bitmap,(M)->M_b,(M)->K_b,i,j);}
350 #define RSB_BLOCK_SET_BIT_FOR_NNZ(IA,JA,k,M) {rsb_coo_idx_t i=(RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,M)); rsb_coo_idx_t j=(RSB_GET_BLOCK_COL_FOR_NZ(JA+k,M)); RSB_BITMAP_SET((M)->options->bitmap,(M)->M_b,(M)->K_b,i,j);}
351 #define RSB_BLOCK_GET_BIT_FOR_NNZ(IA,JA,k,M) {rsb_coo_idx_t i=(RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,M)); rsb_coo_idx_t j=(RSB_GET_BLOCK_COL_FOR_NZ(JA+k,M)); RSB_BITMAP_GET((M)->options->bitmap,(M)->M_b,(M)->K_b,i,j);}
352 
353 /*
354  * Macros for one dimensional bitmaps -- easier to use.
355  * */
356 #ifdef RSB_BITMAP_ROW_MAJOR_ORDER
357 #define RSB_BITVECTOR_GET(p,bits,bit)     RSB_BITMAP_GET(p,bits,1,bit,0)
358 #define RSB_BITVECTOR_SET(p,bits,bit)     RSB_BITMAP_SET(p,bits,1,bit,0)
359 #define RSB_BITVECTOR_UNSET(p,bits,bit)   RSB_BITMAP_UNSET(p,bits,1,bit,0)
360 #define RSB_BITVECTOR_CLEAR(p,bits)       RSB_BITMAP_CLEAR(p,bits,1)
361 #define RSB_BYTES_PER_BITVECTOR(bits)     RSB_BYTES_PER_BITMAP(bits,1)
362 #define RSB_WORDS_PER_BITVECTOR(bits)     RSB_WORDS_PER_BITMAP(bits,1)
363 #else /* RSB_BITMAP_ROW_MAJOR_ORDER */
364 #define RSB_BITVECTOR_GET(p,bits,bit)     RSB_BITMAP_GET(p,1,bits,0,bit)
365 #define RSB_BITVECTOR_SET(p,bits,bit)     RSB_BITMAP_SET(p,1,bits,0,bit)
366 #define RSB_BITVECTOR_UNSET(p,bits,bit)   RSB_BITMAP_UNSET(p,1,bits,0,bit)
367 #define RSB_BITVECTOR_CLEAR(p,bits)       RSB_BITMAP_CLEAR(p,1,bits)
368 #define RSB_BYTES_PER_BITVECTOR(bits)     RSB_BYTES_PER_BITMAP(1,bits)
369 #define RSB_WORDS_PER_BITVECTOR(bits)     RSB_WORDS_PER_BITMAP(1,bits)
370 #endif /* RSB_BITMAP_ROW_MAJOR_ORDER */
371 
372 	/* given :
373 	 * the address of the nonzero element column index
374 	 * a rsb_options_t pointer
375 	 * will return the block column variable as it should.
376 	 * note that this macro will work only if o->M_b, o->K_b, o->p_r IA are properly initialized and o->p_r sorted.
377 	 *
378 	 * p.s.: note that the use of this could be avoided with a modest memory allocation...
379 	 * p.s.: in the following, we blindly trust that bsearch won't fail
380 	 * */
381 #define RSB_GET_BLOCK_COL_FOR_NZ_(columnidxp,cpntr,K_b) (((rsb_coo_idx_t*)bsearch((columnidxp),(cpntr),(K_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((cpntr))))
382 #define RSB_GET_BLOCK_COL_FOR_NZ(columnidxp,M)		(RSB_GET_BLOCK_COL_FOR_NZ_((columnidxp),(M)->cpntr,(M)->K_b))
383 
384 #define RSB_GET_BLOCK_ROW_FOR_NZ_(rowidxp,rpntr,M_b) (((rsb_coo_idx_t*)bsearch((rowidxp)   ,(rpntr),(M_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((rpntr))))
385 #define RSB_GET_BLOCK_ROW_FOR_NZ(rowidxp   ,M)		(RSB_GET_BLOCK_ROW_FOR_NZ_((rowidxp),(M)->rpntr,(M)->M_b))
386 
387 #define RSB_GET_BLOCK_MAJ_FOR_NZ_(majidxp,Mpntr,Md_b) (((rsb_coo_idx_t*)bsearch((majidxp)   ,(Mpntr),(Md_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((Mpntr))))
388 #define RSB_GET_BLOCK_MAJ_FOR_NZ(majidxp   ,M)		(RSB_GET_BLOCK_MAJ_FOR_NZ_((majidxp),(M)->Mpntr,(M)->Mdim))
389 
390 #define RSB_GET_BLOCK_MIN_FOR_NZ_(minidxp,mpntr,md_b) (((rsb_coo_idx_t*)bsearch((minidxp)   ,(mpntr),(md_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((mpntr))))
391 #define RSB_GET_BLOCK_MIN_FOR_NZ(minidxp   ,M)		(RSB_GET_BLOCK_MIN_FOR_NZ_((minidxp),(M)->mpntr,(M)->mdim))
392 
393 #define GET_BLOCK_FIRST_COLUMN(column,M)	((M)->cpntr[(column)])
394 #define GET_BLOCK_FIRST_ROW(row,M)		((M)->rpntr[ (row)  ])
395 
396 #define GET_BLOCK_WIDTH(column,M)		(((M)->cpntr[(column)+1])-((M)->cpntr[(column)]))
397 #define GET_BLOCK_HEIGHT(row,M)			(((M)->rpntr[ (row)  +1])-((M)->rpntr[ (row)  ]))
398 
399 #define GET_BLOCK_SIZE(row,column,M)	((GET_BLOCK_WIDTH((column),(M)))*(GET_BLOCK_HEIGHT((row),(M))))
400 
401 /*
402  * Blanks a whole matrix block.
403  * */
404 #define RSB_BLANK_BLOCK(BP,M,BLOCKROW,BLOCKCOLUMN)				\
405 	RSB_BZERO( ((rsb_byte_t*)(BP)),						\
406 		( (M)->el_size * GET_BLOCK_SIZE((BLOCKROW),(BLOCKCOLUMN),(M)) ) );
407 
408 #define RSB_INTRA_BLOCK_ROW(row,blockrow,M) ((row) - (M)->rpntr[(blockrow)])
409 #define RSB_INTRA_BLOCK_COLUMN(column,blockcolumn,M) ((column) - (M)->rpntr[(blockcolumn)])
410 #define RSB_GET_INTRA_BLOCK_OFFSET_ROW_MAJOR(row,column,blockrow,blockcolumn,M) ((( (row) - (M)->rpntr[(blockrow)]) * (GET_BLOCK_WIDTH((blockcolumn),(M))) + ( (column) - (M)->cpntr[(blockcolumn)] )) * (M)->el_size)
411 #define RSB_GET_INTRA_BLOCK_OFFSET_COLUMN_MAJOR(row,column,blockrow,blockcolumn,M) ((( (column) - (M)->cpntr[(blockcolumn)]) * (GET_BLOCK_HEIGHT((blockrow,(M))) + ( (row) - (M)->rpntr[(blockrow)] )) * (M)->el_size)
412 
413 #define RSB_GET_INTRA_BLOCK_ROW_STRIDE(blockrow,blockcolumn,M) (GET_BLOCK_WIDTH((blockcolumn),(M)))
414 #define RSB_GET_INTRA_BLOCK_OFFSET(row,column,blockrow,blockcolumn,M) \
415 	(RSB_GET_INTRA_BLOCK_OFFSET_ROW_MAJOR(row,column,blockrow,blockcolumn,M))
416 #define RSB_GET_INTRA_BLOCK_OFFSET_TRANSPOSED(row,column,blockrow,blockcolumn,M) \
417 	(RSB_GET_INTRA_BLOCK_OFFSET_COLUMN_MAJOR(row,column,blockrow,blockcolumn,M))
418 
419 /*!
420  * Macros for diagonal-related comparisons.
421  *
422  * \code
423  *  (ROW,COL)      (ROW,COL+COLS)
424  *     +--------------+
425  *     |              |
426  *     |              |
427  *     |              |
428  *    ...            ...
429  *     |              |
430  *     |              |
431  *     +--------------+
432  *  (ROW+ROWS,COL)      (ROW+ROWS,COL+COLS)
433  *
434  *
435  * 	under diagonal  	        over diagonal
436  * 	intersects first at row COL     intersects first at row ROW
437  * 	intersects last at row ROW+ROWS     intersects last at row COL+COLS
438  *
439  *	+---------------+       +-\-------------+
440  *     \|               |       |  \            |
441  *      \               |       |   \           |
442  *      |\              |       |    \          |
443  *     ...             ...     ...             ...
444  *      |               |       |              \|
445  *      |               |       |               \
446  *      +-----\---------+	+---------------+
447  * \endcode
448  * */
449 
450 #define RSB_POINT_QUASI_UNDER_DIAGONAL(ROW,COL) 	((ROW)>=(COL))
451 #define RSB_POINT_QUASI_OVER_DIAGONAL(ROW,COL)	 	((ROW)<=(COL))
452 #define RSB_POINT_UNDER_DIAGONAL(ROW,COL) 		((ROW)> (COL))
453 #define RSB_POINT_OVER_DIAGONAL(ROW,COL) 		((ROW)< (COL))
454 
455 #define RSB_POINT_UNDER_SUPRA_DIAGONAL(ROW,COL,OFFSET) 	  (RSB_POINT_UNDER_DIAGONAL(((ROW)+(OFFSET)),(COL)))
456 #define RSB_POINT_UNDER_SUB_DIAGONAL(ROW,COL,OFFSET) 	  (RSB_POINT_UNDER_DIAGONAL(((ROW)),((COL)+(OFFSET))))
457 #define RSB_POINT_OVER_SUPRA_DIAGONAL(ROW,COL,OFFSET) 	  (RSB_POINT_OVER_DIAGONAL(((ROW)+(OFFSET)),(COL)))
458 #define RSB_POINT_OVER_SUB_DIAGONAL(ROW,COL,OFFSET) 	  (RSB_POINT_OVER_DIAGONAL((ROW),((COL)+(OFFSET))))
459 
460 /* assumes COLS>=1, ROWS>=1 */
461 #define RSB_BLOCK_CROSSED_BY_DIAGONAL(ROW,COL,ROWS,COLS)	\
462 	(							\
463 	RSB_POINT_QUASI_UNDER_DIAGONAL((ROW)+(ROWS-1),(COL)) && 	\
464 	RSB_POINT_QUASI_OVER_DIAGONAL((ROW),((COL)+(COLS-1))) )
465 #define RSB_BLOCK_CROSSED_BY_SUPRA_DIAGONAL(ROW,COL,ROWS,COLS,OFFSET)	\
466 	RSB_BLOCK_CROSSED_BY_DIAGONAL((ROW)+(OFFSET),(COL),(ROWS),(COLS))
467 #define RSB_BLOCK_CROSSED_BY_SUB_DIAGONAL(ROW,COL,ROWS,COLS,OFFSET)	\
468 	RSB_BLOCK_CROSSED_BY_SUPRA_DIAGONAL(COL,ROW,COLS,ROWS,OFFSET)
469 #define RSB_BLOCK_CROSSED_BY_SUPRA_OR_SUB_DIAGONAL(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET)	\
470 	(										\
471 	(LOFFSET)>(UOFFSET)?								\
472 	(RSB_BLOCK_CROSSED_BY_SUB_DIAGONAL(ROW,COL,ROWS,COLS,LOFFSET)):			\
473 	(RSB_BLOCK_CROSSED_BY_SUPRA_DIAGONAL(ROW,COL,ROWS,COLS,UOFFSET))	)
474 
475 /*
476  * The offset in the block to the first element which is on the diagonal
477  * The stride will be ROWS+1 or COLS+1, depending on the internal storage.
478  * We here assume C storage.
479  * */
480 #define RSB_BLOCK_DIAGONAL_OFFSET(ROW,COL,ROWS,COLS)	\
481 	((RSB_POINT_UNDER_DIAGONAL((ROW),(COL)))  ? ((ROW)-(COL)) : (((COL)-(ROW))*(COLS)) )
482 /* if the block is internally stored in Fortran, then : */
483 #define RSB_BLOCK_DIAGONAL_OFFSET_FORTRAN_STORED(ROW,COL,ROWS,COLS)	\
484 	((RSB_POINT_OVER_DIAGONAL((ROW),(COL)))  ? (((COL)-(ROW))*(ROWS)) : ((ROW)-(COL))  )
485 
486 /* The following is ordering-neutral.
487 
488    +------------------->
489    | \   +-----+ ^^
490    |   \ |     | ||
491    |     \     | |v RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW
492    |     +-\---+ v  RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW
493    |<--->    \      RSB_BLOCK_DIAGONAL_OFFSET
494    |           \
495    v
496 
497    +------------------->
498    |    \        ^^
499    |      \      ||
500    |     +--\--+ v| RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_FIRST_ROW
501    |     |    \|  v RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_LAST_ROW
502    |     |     |\
503    |     +-----+  \
504    |
505    v
506 
507    +------------------->
508    |             ^^
509    |             ||
510    | \   +-----+ ||
511    |   \ |     | ||
512    |     \     | |v RSB_BLOCK_SUB_DIAGONAL_OFFSET_FIRST_ROW
513    |     +-\---+ v  RSB_BLOCK_SUB_DIAGONAL_OFFSET_LAST_ROW
514    |         \
515    |           \
516    v
517  */
518 #define RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS)	\
519 	((RSB_POINT_UNDER_DIAGONAL((ROW),(COL)))  ? (ROW) :  (COL) )
520 
521 #define RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS)	\
522 	((RSB_POINT_UNDER_DIAGONAL((((ROW)+(ROWS))-1),(((COL)+(COLS))-1)))  ? (((COL)+(COLS))-1):(((ROW)+(ROWS))-1)  )
523 
524 #define RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS,OFFSET)	\
525 	(RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW(((ROW)+(OFFSET)),(COL),ROWS,COLS))
526 
527 /*#define RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS,OFFSET)	*/
528 /*	RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW((ROW)+(OFFSET),COL,ROWS,COLS)*/
529 
530 #define RSB_BLOCK_SUB_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS,OFFSET)	\
531 	(RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW(ROW,(COL)+(OFFSET),ROWS,COLS))
532 
533 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET)	\
534 	((RSB_POINT_UNDER_DIAGONAL((ROW)+(UOFFSET),(COL)+(LOFFSET)))  ? (ROW) :  (COL)+(LOFFSET)-(UOFFSET) )
535 
536 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_FIRST_COL(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET)	\
537 	RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_FIRST_ROW(COL,ROW,COLS,ROWS,UOFFSET,LOFFSET)
538 
539 #define RSB_BLOCK_SUB_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS,OFFSET)	\
540 	RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW((ROW),(COL)+(OFFSET),ROWS,COLS)
541 
542 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET)	\
543 	((RSB_POINT_UNDER_DIAGONAL((ROW)+((ROWS)-1)+(UOFFSET),(COL)+((COLS)-1)+(LOFFSET)))  ? (COL)+((COLS)-1)+(LOFFSET)-(UOFFSET) : (ROW)+(ROWS)-1)
544 
545 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_LAST_COL(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET)	\
546 	RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_LAST_ROW(COL,ROW,COLS,ROWS,UOFFSET,LOFFSET)
547 
548 
549 /* pure VBR, with no trailing structs : */
550 
551 /* row major order (default) : */
552 
553 #define	RSB_GET_NEXT_BLOCK_POINTER(BP,M,ROWVAR,COLVAR,BLOCKROWSVAR,BLOCKCOLSVAR,BLOCKROWVAR,BLOCKCOLUMNVAR)	\
554 	/*										\
555 	 * *input*									\
556 	 * M		should be a valid rsb_mtx_t structure pointer		\
557 	 * *output*									\
558 	 * ROWVAR	will be set to the base row    of this block			\
559 	 * COLVAR	will be set to the base column of this block			\
560 	 * BLOCKROWSVAR	will be set to the rows   count of this block			\
561 	 * BLOCKCOLSVAR	will be set to the column count of this block			\
562 	 * BP		 will be set to the current block pointer			\
563 	 * */										\
564 	++_k;										\
565 	if(_k>=(M)->bpntr[*_pi+1])							\
566 	{										\
567 		++*_pi;	/* new blocks row */						\
568 		while( (M)->bpntr[*_pi] == (M)->bpntr[*_pi+1] )		/* skipping empty rows */		\
569 			++*_pi;											\
570 	}													\
571 	*_pj=(M)->bindx[_k]; 						/* the current block column index  */	\
572 	_lastk=_k;												\
573 	(BLOCKROWVAR)=_i;											\
574 	(BLOCKCOLUMNVAR)=_j;											\
575 	(ROWVAR)=(M)->rpntr[_i];					/* _i is the current block row index */	\
576 	(COLVAR)=(M)->cpntr[_j]; 					/* the current block column index  */	\
577 	/*(BLOCKROWSVAR)=(M)->rpntr[_i+1]-(M)->rpntr[_i];*/ 		/* the current block rows    count */	\
578 	/*(BLOCKCOLSVAR)=(M)->cpntr[_j+1]-(M)->cpntr[_j];*/			/* the current block columns count */	\
579 	/*(BP)=(rsb_byte_t*)((M)->VA ) + (M)->el_size * (M)->indptr[_k] ; */						\
580 	(BLOCKROWSVAR)=GET_BLOCK_HEIGHT(_i,(M));	/* the current block rows    count */			\
581 	(BLOCKCOLSVAR)=GET_BLOCK_WIDTH( _j,(M)); 	/* the current block rows    count */			\
582 	(BP)=(rsb_byte_t*)(RSB_BLOCK_ADDRESS((M),_k));										\
583 	;
584 
585 /* row major order : */
586 #define RSB_GET_FIRST_BLOCK_POINTER(BP,M,ROWVAR,COLVAR,BLOCKROWSVAR,BLOCKCOLSVAR,BLOCKROWVAR,BLOCKCOLUMNVAR)	\
587 	rsb_blk_idx_t _i=0,_j=0;										\
588 	rsb_blk_idx_t *_pi=NULL,*_pj=NULL;									\
589 	rsb_nnz_idx_t _k=0,_lastk=0;										\
590 	if((M)->flags&RSB_FLAG_WANT_COLUMN_MAJOR_ORDER){_pi=&_j;_pj=&_i;}else{_pi=&_i;_pj=&_j;} 		\
591 	while( (M)->bpntr[*_pi] == (M)->bpntr[*_pi+1] )								\
592 		++*_pi;												\
593 	_k=(M)->bpntr[*_pi]; 		/* _k is the first block index for the current row of blocks */		\
594 	*_pj=(M)->bindx[_k]; 						/* the current block column index  */	\
595 	(BLOCKROWVAR)=_i;											\
596 	(BLOCKCOLUMNVAR)=_j;											\
597 	(ROWVAR)=(M)->rpntr[_i];					/* _i is the current block row index */	\
598 	(COLVAR)=(M)->cpntr[_j]; 					/* the current block column index  */	\
599 	(BLOCKROWSVAR)=GET_BLOCK_HEIGHT(_i,(M));	/* the current block rows    count */			\
600 	(BLOCKCOLSVAR)=GET_BLOCK_WIDTH( _j,(M)); 	/* the current block rows    count */			\
601 	(BP)=(rsb_byte_t*)(RSB_BLOCK_ADDRESS((M),_k));
602 #define RSB_GOT_LAST_BLOCK_POINTER(M)	( _lastk >= (M)->block_count )
603 
604 
605 #define RSB_POINT_IN_BOX(R0,C0,RH,CW,R,C)	( ((R)>=(R0)) && (R)<((R0)+(RH)) && ((C)>=(C0)) && (C)<((C0)+(CW)) )
606 #define RSB_MATRIX_CONTAINS(M,R,C) 		( RSB_POINT_IN_BOX((M)->roff,(M)->coff,(M)->nr,(M)->nc,R,C) )
607 #define RSB_SUBMATRIX_CONTAINS_ROW(M,R) 		( RSB_POINT_IN_BOX((M)->roff,0,(M)->nr,0,R,1) )
608 #define RSB_SUBMATRIX_INTERSECTS_COLS(M,C0,C1) 							\
609 	   ( ( (M)->coff <= (C1) ) && ( (M)->coff+(M)->nc > (C0) ) )
610 #define RSB_SUBMATRIX_INTERSECTS_ROWS(M,R0,R1) 							\
611 	   ( ( (M)->roff <= (R1) ) && ( (M)->roff+(M)->nr > (R0) ) )
612 
613 #define RSB_SUBMATRIX_INTERSECTS_BOX(M,R0,R1,C0,C1) 			\
614 	(RSB_SUBMATRIX_INTERSECTS_ROWS(M,R0,R1)&&RSB_SUBMATRIX_INTERSECTS_COLS(M,C0,C1))
615 
616 #define RSB_FIND_SUBMATRIX_CONTAINING(M,R,C)	( \
617 	((M)->sm[0]&&RSB_MATRIX_CONTAINS((M)->sm[0],R,C)?(M)->sm[0]: \
618 	((M)->sm[1]&&RSB_MATRIX_CONTAINS((M)->sm[1],R,C)?(M)->sm[1]: \
619 	((M)->sm[2]&&RSB_MATRIX_CONTAINS((M)->sm[2],R,C)?(M)->sm[2]: \
620 	((M)->sm[3]&&RSB_MATRIX_CONTAINS((M)->sm[3],R,C)?(M)->sm[3]:NULL )))))
621 
622 #define RSB_SUBMATRIX_INDEX(M,I,J) (M->sm[(I)*2+(J)])
623 /*
624  * this should be fixed. we would prefer to use intrinsics here. TODO
625  * */
626 #define RSB_FABS(x) ((x)<(0)?(-x):(x))
627 
628 /*!
629  * Misc macros.
630  */
631 #define RSB_ASSIGN_IF_ZERO(VAR,VAL) if( (VAR) == 0) (VAR) = (VAL);
632 #define RSB_SWAP(TYPE,X,Y) {TYPE __tmp=(X);(X)=(Y);(Y)=(__tmp);}
633 
634 #define RSB_SUBMATRIX_FOREACH_(matrix,submatrix,smi,smj,smk) 					\
635 	/*int smk;*/										\
636 	for(smk=0;smk<4;++smk)									\
637 	if( (smi=smk/2) >=0 && (smj=smk%2) >= 0 && (submatrix=(matrix)->sm[smi*2+(smj)] ) )	\
638  	/* NOTE : handle with care (the 'submatrix' pointer could be NULL) */		\
639 
640 #define RSB_SUBMATRIX_FOREACH_REVERSE(matrix,submatrix,smi,smj) 				\
641 	/*int smi,smj;*/								\
642 	for(smi=1;smi+1>0;--smi)/* fisrt smi, then smj, or will break spmv_uxux, ... */		\
643 	for(smj=1,submatrix=matrix->sm[smi*2+smj];					\
644 		smj+1>0;									\
645 		--smj,submatrix=(smi<2 && smj<2)?matrix->sm[smi*2+(smj)]:NULL)		\
646  	/* NOTE : handle with care (the 'submatrix' pointer could be NULL) */		\
647 
648 #define RSB_SUBMATRIX_FOREACH(MTXAP,submatrix,smi,smj) 				\
649 	/*int smi,smj;*/								\
650 	for(smi=0;smi<2;++smi)/* fisrt smi, then smj, or will break spmv_uxux, ... */		\
651 	for(smj=0,submatrix=MTXAP->sm[smi*2+smj];					\
652 		smj<2;									\
653 		++smj,submatrix=(smi<2 && smj<2)?MTXAP->sm[smi*2+(smj)]:NULL)		\
654  	/* NOTE : handle with care (the 'submatrix' pointer could be NULL) */		\
655 
656 /* The following is incorrect, as it accesses one further pointer. */
657 /*
658 #define RSB_SUBMATRIX_FOREACH_LEAF(MTXAP,submatrix,smi) 				\
659 	for(	(smi)=0,submatrix=(MTXAP)->all_leaf_matrices[(smi)].mtxlp;		\
660 		(smi)<(MTXAP)->all_leaf_matrices_n;					\
661 			++(smi),submatrix=(MTXAP)->all_leaf_matrices[smi].mtxlp)	\
662 */
663 
664 /* The following is correct, even if less elegant because of the bad style of assignment. */
665 #define RSB_SUBMATRIX_FOREACH_LEAF(MTXAP,submatrix,smi) 				\
666 	for(	(smi)=0;		\
667 		((smi)<(MTXAP)->all_leaf_matrices_n) && ( submatrix=(MTXAP)->all_leaf_matrices[(smi)].mtxlp );	\
668 			++(smi))
669 
670 #define RSB_SUBMATRIX_FOREACH_LEAF_PERMUTED(MTXAP,submatrix,smi,PV)				\
671 	for(	(smi)=0;		\
672 		((smi)<(MTXAP)->all_leaf_matrices_n) && ( submatrix=(MTXAP)->all_leaf_matrices[PV[(smi)]].mtxlp );	\
673 			++(smi))
674 
675 #define RSB_SUBMATRIX_IS_ON_DIAG(matrix) 	((matrix)->roff==(matrix)->coff)
676 #define RSB_SUBMATRIX_IS_LOWDIAG(matrix) 	((matrix)->roff>(matrix)->coff)
677 #define RSB_SUBMATRIX_IS_UPPDIAG(matrix) 	((matrix)->roff<(matrix)->coff)
678 
679 #define RSB_SUBMATRIX_FOREACH_DIAG_LEAF(matrix,submatrix,smi) 				\
680 	RSB_SUBMATRIX_FOREACH_LEAF(matrix,submatrix,smi) 				\
681 		if(RSB_SUBMATRIX_IS_ON_DIAG(submatrix))
682 
683 #define RSB_SUBMATRIX_FOREACH_LOWDIAG_LEAF(matrix,submatrix,smi)			\
684 	RSB_SUBMATRIX_FOREACH_LEAF(matrix,submatrix,smi) 				\
685 		if(RSB_SUBMATRIX_IS_LOWDIAG(submatrix))
686 
687 #define RSB_SUBMATRIX_FOREACH_UPPDIAG_LEAF(matrix,submatrix,smi) 				\
688 	RSB_SUBMATRIX_FOREACH_LEAF(matrix,submatrix,smi) 				\
689 		if(RSB_SUBMATRIX_IS_UPPDIAG(submatrix))
690 
691 #define RSB_SUBMATRIX_COLS_INTERSECTION_FIRST(matrix,C)					\
692 	   RSB_MAX(((matrix)->coff),(C))
693 
694 #define RSB_SUBMATRIX_COLS_INTERSECTION_LAST(matrix,C)					\
695 	   RSB_MIN(((matrix)->coff+(matrix->nc-1)),(C))	/* FIXME: requires matrix->nc > 0 */
696 
697 #define RSB_SUBMATRIX_ROWS_INTERSECTION_FIRST(matrix,R)					\
698 	   RSB_MAX(((matrix)->roff),(R))
699 
700 #define RSB_SUBMATRIX_ROWS_INTERSECTION_LAST(matrix,R)					\
701 	   RSB_MIN(((matrix)->roff+(matrix->nr-1)),(R))	/* FIXME: requires matrix->nr > 0 */
702 
703 #define RSB_BCSS_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn,BR,BC)	\
704 	RSB_DEBUG_ASSERT((matrix)->VA);									\
705 	RSB_DEBUG_ASSERT((matrix)->el_size>0);								\
706 	RSB_DEBUG_ASSERT((matrix)->br>0 && (matrix)->bc>0);							\
707 	blockpointer=(matrix)->VA;									\
708 	for(	bri=0,											\
709 		baserow=(bri)*(BR);									\
710 		bri<(matrix)->Mdim;									\
711 		++bri,											\
712 		baserow=(bri)*(BR)									\
713 		)											\
714 	for(	bi=(matrix)->bpntr[bri],									\
715 		bci=(matrix)->bindx[bi],									\
716 		basecolumn=(bci)*(BC);								\
717 		bi<(matrix)->bpntr[(bri)+1];								\
718 		++bi,											\
719 		blockpointer=((rsb_byte_t*)blockpointer)+(matrix)->el_size*(BR)*(BC),		\
720 		bci=(matrix)->bindx[bi],									\
721 		baserow=(bri)*(BR),									\
722 		basecolumn=(bci)*(BC)								\
723 		)
724 
725 #define RSB_BCSR_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn)	\
726 	RSB_BCSS_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn,matrix->br,matrix->bc)
727 
728 #define RSB_BCSC_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn)	\
729 	RSB_BCSS_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bci,bri,blockindex,basecolumn,baserow,matrix->bc,matrix->br)
730 
731 #define RSB_CONST_ENOUGH_BYTES_FOR_ANY_TYPE 32 /** should adapt this in case of need */
732 #define RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE (RSB_CONST_ENOUGH_BYTES_FOR_ANY_TYPE/sizeof(rsb_aligned_t))	/** should adapt this in case of need */
733 
734 
735 #define RSB_INTERNAL_FLAG_CSR_SORTING_MASK (RSB_FLAG_QUAD_PARTITIONING | RSB_FLAG_OBSOLETE_BLOCK_ASYMMETRIC_Z_SORTING)
736 
737 #define RSB_DO_FLAGS_EXTRACT_STORAGE(F)	 ( \
738 		/*((F) & RSB_FLAG_WANT_LINKED_STORAGE) */ 0 | \
739 		((F) & RSB_FLAG_WANT_COO_STORAGE) | \
740 		((F) & RSB_FLAG_WANT_FIXED_BLOCKING_VBR) | \
741 		((F) & RSB_FLAG_WANT_BCSS_STORAGE) | \
742 		RSB_FLAG_NOFLAGS )
743 
744 #define RSB__FLAG_HAS_UNSPECIFIED_TRIANGLE(FLAGS) (RSB_DO_FLAG_HAS((FLAGS),RSB_FLAG_TRIANGULAR) && !RSB_DO_FLAG_HAS_INTERSECTION((FLAGS),RSB_FLAG_LOWER|RSB_FLAG_UPPER))
745 
746 #if 1
747 #define rsb_do_spmv(TRANSA,ALPHAP,MTXAP,XP,INCX,BETAP,YP,INCY)	\
748        	rsb__do_spmv_general(TRANSA,ALPHAP,MTXAP,XP,INCX,BETAP,YP,INCY,(RSB_OP_FLAG_DEFAULT) RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS)
749 #else
rsb_do_spmv(rsb_trans_t transA,const void * alphap,const struct rsb_mtx_t * mtxAp,const void * Xp,rsb_coo_idx_t incX,const void * betap,void * Yp,rsb_coo_idx_t incY)750 rsb_err_t rsb_do_spmv(rsb_trans_t transA, const void *alphap, const struct rsb_mtx_t * mtxAp, const void * Xp, rsb_coo_idx_t incX, const void * betap, void * Yp, rsb_coo_idx_t incY)
751 {
752 	rsb_err_t errval = rsb__do_spmv_general(transA,alphap,mtxAp,Xp,incX,betap,Yp,incY,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
753 	return errval;
754 }
755 #endif
756 
757 /* We may use custom memcpy functions. */
758 #define RSB_MEMCPY(DST,SRC,BYTES) rsb__memcpy((DST),(SRC),(BYTES))
759 
760 #define RSB_A_BZERO(ID,DOFF,NNZ,ES) \
761 	RSB_BZERO(((rsb_byte_t*)(ID))+(ES)*(DOFF),(ES)*(NNZ)) \
762 
763 #define RSB_A_MEMCPY(ID,IS,DOFF,SOFF,NNZ,ES) \
764 	RSB_MEMCPY(((rsb_char_t*)(ID))+(ES)*(DOFF),((const rsb_char_t*)(IS))+(ES)*(SOFF),(ES)*(NNZ)) \
765 
766 #define RSB_A_MEMMOVE(ID,IS,DOFF,SOFF,NNZ,ES) \
767 	RSB_MEMMOVE(((rsb_char_t*)(ID))+(ES)*(DOFF),((const rsb_char_t*)(IS))+(ES)*(SOFF),(ES)*(NNZ)) \
768 
769 #define RSB_COA_MEMCPY(ID,IS,DOFF,SOFF,NNZ) \
770 	RSB_MEMCPY(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)) \
771 
772 #define RSB_COA_MEMCPY2H(ID,IS,DOFF,SOFF,NNZ,ADD) 					\
773 {											\
774 	rsb_nnz_idx_t RSB_DUMMY_ID=0;							\
775 	for(RSB_DUMMY_ID=0;RSB_DUMMY_ID<(NNZ);++RSB_DUMMY_ID)				\
776 		((rsb_half_idx_t*)(ID))[(DOFF)+(RSB_DUMMY_ID)]=			\
777 		((const rsb_coo_idx_t*)IS)[(SOFF)+(RSB_DUMMY_ID)]+(ADD);		\
778 }
779 
780 #define RSB_COA_MEMMOVE(ID,IS,DOFF,SOFF,NNZ) \
781 	RSB_MEMMOVE(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)) \
782 
783 #define RSB_COO_MEMMOVE(VD,ID,JD,VS,IS,JS,DOFF,SOFF,NNZ,ES) \
784 	RSB_MEMMOVE(((rsb_char_t*)(VD))+(ES)*(DOFF),((const rsb_char_t*)(VS))+(ES)*(SOFF),(ES)*(NNZ)), \
785 	RSB_MEMMOVE(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)), \
786 	RSB_MEMMOVE(((rsb_coo_idx_t*)(JD))+(DOFF),((const rsb_coo_idx_t*)(JS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ))
787 
788 #define RSB_COO_MEMCPY(VD,ID,JD,VS,IS,JS,DOFF,SOFF,NNZ,ES) \
789 	RSB_MEMCPY(((rsb_char_t*)(VD))+(ES)*(DOFF),((const rsb_char_t*)(VS))+(ES)*(SOFF),(ES)*(NNZ)), \
790 	RSB_MEMCPY(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)), \
791 	RSB_MEMCPY(((rsb_coo_idx_t*)(JD))+(DOFF),((const rsb_coo_idx_t*)(JS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ))
792 
793 #define RSB_CSR_MEMCPY(VD,ID,JD,VS,IS,JS,NNZ,NR,ES) \
794 	RSB_MEMCPY(((rsb_char_t   *)(VD)),((const rsb_char_t   *)(VS)),(ES)*(NNZ)), \
795 	RSB_MEMCPY(((rsb_nnz_idx_t*)(ID)),((const rsb_nnz_idx_t*)(IS)),sizeof(rsb_nnz_idx_t)*(NR)), \
796 	RSB_MEMCPY(((rsb_coo_idx_t*)(JD)),((const rsb_coo_idx_t*)(JS)),sizeof(rsb_coo_idx_t)*(NNZ))
797 
798 #define RSB_CSR2COO_MEMCPY(VD,ID,JD,VS,I,JS,DOFF,SOFF,NNZ,ES) \
799 	RSB_MEMCPY(((rsb_char_t*)(VD))+(ES)*(DOFF),((const rsb_char_t*)(VS))+(ES)*(SOFF),(ES)*(NNZ)), \
800 	rsb__util_coo_array_set(((rsb_coo_idx_t*)(ID))+(DOFF),(NNZ),(I)), \
801 	RSB_MEMCPY(((rsb_coo_idx_t*)(JD))+(DOFF),((const rsb_coo_idx_t*)(JS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ))
802 
803 #define RSB_COO_MEMCPY_parallel(VD,ID,JD,VS,IS,JS,DOFF,SOFF,NNZ,ES) \
804 	RSB_A_MEMCPY_parallel(VD,VS,DOFF,SOFF,NNZ,ES), \
805 	RSB_COA_MEMCPY_parallel(ID,IS,DOFF,SOFF,NNZ), \
806 	RSB_COA_MEMCPY_parallel(JD,JS,DOFF,SOFF,NNZ)
807 
808 #define RSB_FCOO_ASUM(S,X,LI,UI) {rsb_coo_idx_t i; for(i=(LI);i<(UI);++i)(S)+=(X)[i];}
809 #define RSB_XCOO_ISET(X,  LI,UI) {rsb_coo_idx_t i; for(i=(LI);i<(UI);++i)(X)[i]=i-(LI);}
810 #define RSB_FCOO_ISET RSB_XCOO_ISET
811 #define RSB_XCOO_VSET(X,V,LI,UI) {rsb_coo_idx_t i; for(i=(LI);RSB_LIKELY((i)<(UI));++i)(X)[(i)] =(V);}
812 #define RSB_XCOO_VADD(X,V,LI,UI) {rsb_coo_idx_t i; for(i=(LI);RSB_LIKELY((i)<(UI));++i)(X)[(i)]+=(V);}
813 #define RSB_XCOO_IREN	/* TODO: to write one */
814 
815 #define RSB_NNZ_OF(MTXAP) ((MTXAP)?((MTXAP)->nnz):0)
816 #define RSB_TYPED_OFF_PTR(TYPECODE,VA,OFF) (((rsb_byte_t*)(VA))+(((size_t)(RSB_SIZEOF(TYPECODE))*(OFF))))
817 #define RSB_COO_LT(I1,J1,I2,J2) ( (I1) < (I2) || ( (I1) == (I2) && ( (J1) < (J2) ) ) )
818 #define RSB_COO_GT(I1,J1,I2,J2) RSB_COO_LT(I2,J2,I1,J1)
819 #define RSB_COO_GE(I1,J1,I2,J2) ( !RSB_COO_LT(I1,J1,I2,J2) )
820 #define RSB_COO_EQ(I1,J1,I2,J2) ( (I1) == (I2) && ( (J1) == (J2) ) )
821 
822 /*!
823  * \ingroup gr_internals
824  * \brief An internal, helper structure.
825  */
826 struct rsb_memory_level_t
827 {
828 	size_t size;				/*  */
829 	size_t level;				/*  */
830 	size_t associativity;			/*  */
831 	size_t linesize;			/*  */
832 };
833 
834 #define RSB_MEGABYTE (1024*1024)
835 #define RSB_MEGABYTE_SYM "MiB"
836 
837 #define RSB_DEFAULT_STREAM stdout
838 #define RSB_DIR_SEPARATOR	'/'	/*  */
839 #define RSB_MAX_STRERRLEN  	128	/*  */
840 #define RSB_MAX_LINE_LENGTH  	1025	/*  */
841 #define RSB_MAX_COMPILE_COMMAND_LENGTH 	1025	/*  */
842 #define RSB_MAX_VERSION_STRING_LENGTH  	4096	/*  */
843 #define RSB_MAX_FILENAME_LENGTH  RSB_MAX_LINE_LENGTH	/* the maximal supported file name length (in buffers) */
844 
845 #define RSB_MAX_SUPPORTED_CACHE_LEVELS 32L	/* the maximal supported height of memory hierarchy */
846 #define RSB_MIN_THREAD_MEMCPY_NNZ 1024		/* minimal count of nonzeros to move for a thread during parallel memcpy */
847 #define RSB_MIN_THREAD_BZERO_BYTES 8192		/* minimal count of nonzeros to bzero for a thread during parallel bzero */
848 #define RSB_MIN_THREAD_XAXPY_NNZ 256 /* 1024 */		/* minimal count of elements for a parallel vector-vector operation */
849 #define RSB_MIN_THREAD_SORT_NNZ 256		/* minimal count of nonzeros to sort for a thread during parallel sort */
850 
851 #define RSB_POWER_OF_2(N) (1<<(N))
852 #define RSB_FRAC(Q,D) (((Q)+((D)-1))/(D))
853 #define RSB_MIDDLE(X) RSB_FRAC(X,2)
854 #define RSB_IS_INTEGER_ODD(X)   ( (X)&0x01)
855 #define RSB_IS_INTEGER_EVEN(X)	(!RSB_IS_INTEGER_ODD(X))
856 
857 #define RSB_HAVE_STREAMS RSB_HAVE_STDIO_H
858 
859 #if defined(RSB_WANT_LIBRSB_STATS) && (RSB_WANT_LIBRSB_STATS> 0)
860 #define RSB_WANT_LIBRSB_TIMER 1
861 #else
862 #define RSB_WANT_LIBRSB_TIMER 0
863 #endif
864 
865 
866 /*!
867  * \ingroup gr_internals
868  * \brief An internal, helper structure.
869  */
870 struct rsb_session_handle_t
871 {
872 	#ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
873 	/*!
874 	 * A global memory counter, used for debugging purposes.
875 	 * */
876 	size_t allocated_memory;			/* total of allocated memory, in bytes */
877 	size_t allocations_count;		/* total number of current allocations */
878 	#endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
879 	size_t min_leaf_matrix_bytes;		/*  */
880 	size_t avg_leaf_matrix_bytes;		/*  */
881 	size_t rsb_g_threads;			/* detected threads */
882 #if RSB_WANT_PERFORMANCE_FILE
883 	/*rsb_byte_t * performance_binary_dump_file;*/	/* TODO: obsolete feature */
884 	rsb_char_t * performance_binary_dump_file;	/* TODO: obsolete feature */
885 #endif /* RSB_WANT_PERFORMANCE_FILE */
886 	/* beginning of user settable variables declarations */
887 	size_t rsb_want_threads;		/* RSB_IO_WANT_EXECUTING_THREADS ; active threads (may be <> rsb_g_threads) */
888 	rsb_int_t asm_sort_method;		/* RSB_IO_WANT_SORT_METHOD */
889 	rsb_real_t subdivision_multiplier;	/* RSB_IO_WANT_SUBDIVISION_MULTIPLIER */
890 	rsb_int_t want_bounded_box;		/* RSB_IO_WANT_BOUNDED_BOX_COMPUTATION */
891 	rsb_int_t cache_blocking_method;	/* RSB_IO_WANT_CACHE_BLOCKING_METHOD */
892 	rsb_int_t want_outer_spmm;		/* RSB_IO_WANT_LEAF_LEVEL_MULTIVEC */
893 #if RSB_HAVE_STREAMS
894 	FILE * out_stream;			/* RSB_IO_WANT_OUTPUT_STREAM */
895 	FILE * error_stream;			/* RSB_IO_WANT_VERBOSE_ERRORS */
896 	FILE * init_stream;			/* RSB_IO_WANT_VERBOSE_INIT */
897 	FILE * exit_stream;			/* RSB_IO_WANT_VERBOSE_EXIT */
898 #endif /* RSB_HAVE_STREAMS */
899 	const rsb_char_t * mhis;		/* RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING ; set via rsb_lib_reinit */
900 #if RSB_WANT_DEBUG_VERBOSE_INTERFACE_NOTICE
901 	rsb_int_t rsb_g_verbose_interface;	/* RSB_IO_WANT_EXTRA_VERBOSE_INTERFACE */
902 #endif /* RSB_WANT_DEBUG_VERBOSE_INTERFACE_NOTICE */
903 	/* end of user settable variables declarations */
904 	long memory_hierarchy_levels;		/*  */
905 	struct rsb_memory_level_t caches[RSB_MAX_SUPPORTED_CACHE_LEVELS];	/* 0,..,memory_hierarchy_levels-1*/
906 	rsb_bool_t rsb_g_initialized;		/*  */
907 #if RSB_WANT_ALLOCATOR_LIMITS
908        	size_t memory_count_max;		/*  */
909 	size_t allocations_count_max;		/*  */
910 #endif /* RSB_WANT_ALLOCATOR_LIMITS */
911 #if RSB_WANT_LIBRSB_TIMER
912 	rsb_time_t etime;
913 #endif /* RSB_WANT_LIBRSB_TIMER */
914 	rsb_int_t verbose_tuning;		/*  */
915 };
916 
917 #define RSB_INTERNALS_COMMON_HEAD_DECLS extern struct rsb_session_handle_t rsb_global_session_handle;
918 #define RSB_DO_ERROR_CUMULATE(ERRVAL,ERRFLAG) RSB_DO_FLAG_ADD((ERRVAL),(ERRFLAG))
919 
920 #define RSB_IF_NOT_NULL_CAST_TO(P,TYPE,FALLBACK) ((P)?*(TYPE*)(P):(FALLBACK))
921 #define RSB_IF_NOT_NULL_SET_TO_CASTED(V,P,TYPE) {if((P)!=NULL){(V)=*(TYPE*)(P);}}
922 #define RSB_IF_NOT_NULL_GET_TO_CASTED(V,P,TYPE) {if((P)!=NULL){*(TYPE*)(P)=(V);}}
923 #define RSB_IF_NOT_NULL_GET_SET_TO_CASTED(V,P,TYPE,F,ERRVAL)	{	\
924 	switch(F){							\
925 		case(RSB_IO_SPECIFIER_GET):				\
926 		RSB_IF_NOT_NULL_GET_TO_CASTED((V),(P),TYPE);break;		\
927 		case(RSB_IO_SPECIFIER_SET):				\
928 		RSB_IF_NOT_NULL_SET_TO_CASTED((V),(P),TYPE);break;	\
929 		default: RSB_DO_ERROR_CUMULATE(ERRVAL,RSB_ERR_BADARGS); }}
930 #define rsb__sprintf sprintf
931 
932 #define RSB_BLAS_ERROR		-1	/* */
933 #define RSB_BLAS_NO_ERROR 	0	/* */
934 #define RSB_BLAS_ERROR_UNSUPPORTED   RSB_BLAS_ERROR			/* TODO: spread usage of this throughout the code */
935 #define RSB_BLAS_ERROR_UNIMPLEMENTED RSB_BLAS_ERROR			/* TODO: spread usage of this throughout the code */
936 #define RSB_BLAS_ERROR_WRONG_USGP_ARG RSB_BLAS_ERROR			/* TODO: spread usage of this throughout the code */
937 
938 #define RSB_SET_IF_NOT_NULL(P,V) if((P)!=NULL)*(P)=V
939 typedef int rsb_blas_int_t;
940 typedef double rsb_aligned_t;	/* see RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE and RSB_CONST_ENOUGH_BYTES_FOR_ANY_TYPE */
941 
942 
943 #define RSB_MASK_OUT_SOME_ERRORS(ERRVAL) {if((ERRVAL)==RSB_ERR_UNSUPPORTED_FEATURE)(ERRVAL)=RSB_ERR_NO_ERROR;}/* NOTE; this is a macro only used to prevent the test suite to complain for failing printouts when output is disabled! */
944 
945 /*!
946  Macros to get indices types liminal values, configuration-dependent.
947 */
948  #define RSB_COO_HALF_BITS_SIZE	((sizeof(rsb_coo_idx_t)*RSB_CHAR_BIT)/2)
949 
950 #define RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS {int ___foo=1;++___foo;}/* will avoid things like error: label at end of compound statement */
951 
952 #if RSB_WANT_ZLIB_SUPPORT
953 #define RSB_FOPEN(X,Y) gzopen((X),(Y))
954 #define RSB_FCLOSE(X) gzclose(X)
955 #else /* RSB_WANT_ZLIB_SUPPORT */
956 #define RSB_FOPEN(X,Y) fopen((X),(Y))
957 #define RSB_FCLOSE(X) fclose(X)
958 #endif /* RSB_WANT_ZLIB_SUPPORT */
959 
960 #define RSB_EMPTY_FILE_FILLER static int foo(void){return 0;}
961 
962 #define RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,TYPE) 	\
963 		TYPE *IA=(TYPE*)(MATRIX)->bpntr;			\
964 		TYPE *JA=(TYPE*)(MATRIX)->bindx;
965 
966 #define RSB_DECLARE_COO_IARRAY_FROM_MATRIX(IA,MATRIX,TYPE) 	\
967 		TYPE *IA=(TYPE*)(MATRIX)->bpntr;
968 
969 #define RSB_DECLARE_COO_JARRAY_FROM_MATRIX(JA,MATRIX,TYPE) 	\
970 		TYPE *JA=(TYPE*)(MATRIX)->bindx;
971 
972 #define RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,PTYPE,ITYPE) 	\
973 		PTYPE *PA=(PTYPE*)(MATRIX)->bpntr;			\
974 		ITYPE *JA=(ITYPE*)(MATRIX)->bindx;
975 
976 #define RSB_DECLARE_CONST_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) 	\
977 	RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,const rsb_nnz_idx_t,const rsb_half_idx_t)
978 
979 #define RSB_DECLARE_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) 	\
980 	RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,rsb_nnz_idx_t,rsb_half_idx_t)
981 
982 #define RSB_DECLARE_CONST_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) 	\
983 	RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,const rsb_nnz_idx_t,const rsb_coo_idx_t)
984 
985 #define RSB_DECLARE_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) 	\
986 	RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,rsb_nnz_idx_t,rsb_coo_idx_t)
987 
988 #define RSB_DECLARE_CONST_HALFCOO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX) 	\
989 	RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,const rsb_half_idx_t)
990 
991 #define RSB_DECLARE_CONST_HALFCOO_IARRAY_FROM_MATRIX(IA,MATRIX) 	\
992 	RSB_DECLARE_COO_IARRAY_FROM_MATRIX(IA,MATRIX,const rsb_half_idx_t)
993 
994 #define RSB_DECLARE_CONST_HALFCOO_JARRAY_FROM_MATRIX(JA,MATRIX) 	\
995 	RSB_DECLARE_COO_JARRAY_FROM_MATRIX(JA,MATRIX,const rsb_half_idx_t)
996 
997 #define RSB_DECLARE_CONST_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX) 	\
998 	RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,const rsb_coo_idx_t)
999 
1000 #define RSB_DECLARE_CONST_FULLCOO_IARRAY_FROM_MATRIX(IA,MATRIX) 	\
1001 	RSB_DECLARE_COO_IARRAY_FROM_MATRIX(IA,MATRIX,const rsb_coo_idx_t)
1002 
1003 #define RSB_DECLARE_CONST_FULLCOO_JARRAY_FROM_MATRIX(JA,MATRIX) 	\
1004 	RSB_DECLARE_COO_JARRAY_FROM_MATRIX(JA,MATRIX,const rsb_coo_idx_t)
1005 
1006 #define RSB_DECLARE_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX) 	\
1007 	RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,rsb_coo_idx_t)
1008 
1009 /*!
1010  * If the restrict keyword is supported, we use it in our declarations.
1011  * */
1012 /* #ifdef restrict */
1013 #ifdef RSB_restrict
1014 #define RSB_RESTRICT restrict
1015 #else /* RSB_restrict */
1016 #define RSB_RESTRICT
1017 #endif /* RSB_restrict */
1018 
1019 #define RSB_VA_OFFSET_POINTER(VA,ES,OFF) 		((rsb_byte_t*)(VA)+(size_t)(ES)*(OFF))
1020 #define RSB_VA_OFFSET_POINTER_CONST(VA,ES,OFF) 		((const rsb_byte_t*)(VA)+(size_t)(ES)*(OFF))
1021 #define RSB_VA_MEMCMP(LVA,LOFF,RVA,ROFF,ES) 		\
1022 	RSB_MEMCMP(RSB_VA_OFFSET_POINTER((LVA),(ES),(LOFF)),RSB_VA_OFFSET_POINTER((RVA),(ES),(ROFF)),(ES))
1023 
1024 /*!
1025  * \brief Auxiliary structure for a coo-stored matrix (usually for temporary operations).
1026  * */
1027 struct rsb_coo_matrix_t{
1028 	rsb_coo_idx_t * IA, * JA;/** row and columns indices */
1029 	rsb_coo_idx_t nr,nc;	/** matrix (declared) nonzeros */
1030 	rsb_nnz_idx_t nnz;	/** matrix rows, columns */
1031 	void * VA;		/** values of data elements */
1032 	rsb_type_t typecode;	/** as specified in the RSB_NUMERICAL_TYPE_* preprocessor symbols in rsb_types.h 	*/
1033 };
1034 
1035 #define RSB_INIT_COO_FROM_MTX(COOP,MTXAP)	{ \
1036 		(COOP)->nr=(MTXAP)->nr;	\
1037 		(COOP)->nc=(MTXAP)->nc;	\
1038 		(COOP)->nnz=(MTXAP)->nnz;	\
1039 		(COOP)->typecode=(MTXAP)->typecode; }
1040 
1041 #define RSB_INIT_CXX_FROM_MTX(COOP,MTXAP)	{ \
1042 		(COOP)->nr=(MTXAP)->nr;	\
1043 		(COOP)->nc=(MTXAP)->nc;	\
1044 		(COOP)->nnz=RSB_MAX((MTXAP)->nnz,1+RSB_MAX((MTXAP)->nr,(MTXAP)->nc)); \
1045 		(COOP)->typecode=(MTXAP)->typecode; }
1046 
1047 #define RSB_BIND_COO_TO_MTX(COOP,MTXAP)	{ \
1048 		(COOP)->VA=(MTXAP)->VA;	\
1049 		(COOP)->IA=(MTXAP)->bpntr;	\
1050 		(COOP)->JA=(MTXAP)->bindx;	}
1051 
1052 #define RSB_FLAG_SOME_SYMMETRY				(RSB_FLAG_HERMITIAN|RSB_FLAG_SYMMETRIC)
1053 #define RSB_FLAG_ALL_STRUCTURAL_FLAGS	(RSB_FLAG_SOME_SYMMETRY|RSB_FLAG_DIAGONAL|RSB_FLAG_TRIANGULAR|RSB_FLAG_UNIT_DIAG_IMPLICIT)
1054 #define RSB_FLAG_ALL_DUPLICATE_FLAGS	(RSB_FLAG_DUPLICATES_KEEP_LAST|RSB_FLAG_DUPLICATES_SUM)
1055 #define RSB_DUMMY_ID		rsb_dummy_id
1056 #define RSB_DUMMY_MTX		(NULL)
1057 #define RSB_DEFAULT_TEST_MATRIX_FILENAME "pd.mtx"	/**< this file should always be included in the library distribution (FIXME: should enforce this) */
1058 
1059 #define RSB_VECTORS_DIFF_DISPLAY_N 10
1060 #define RSB_VECTORS_DIFF_DISPLAY_N_SMALL 3
1061 #define RSB_DEFAULT_UNDEFINED_COO_VALUE 0
1062 
1063 #define RSB_PSORT_CHUNK 10000			/* FIXME: hardcoded constants are bad (and the PGI compiler won't accept them) */
1064 #define RSB_MINIMUM_VECOP_OMP_CHUNK 1000			/* FIXME: hardcoded constants are bad (and the PGI compiler won't accept them) */
1065 
1066 #define RSB_BOOL_IS_POINTER_NON_NULL(P) ((P)?RSB_BOOL_TRUE:RSB_BOOL_FALSE)
1067 
1068 #define RSB_CONDITIONAL_ERRPSET(ERRVALP,ERRVAL) {if(ERRVALP)(*(ERRVALP)=(ERRVAL));}
1069 #define RSB_MTX_FREE(MTXAP) if(MTXAP){rsb__do_mtx_free(MTXAP);(MTXAP)=NULL;}  /* frees the matrix and nullifies the associated pointer. */
1070 
1071 /* #define RSB_FLAGS_RSB_AGNOSTIC RSB_FLAG_FORTRAN_INDICES_INTERFACE */
1072 /* #define RSB_FLAGS_RSB_NON_AGNOSTIC (RSB_FLAG_USE_HALFWORD_INDICES|RSB_FLAG_WANT_COO_STORAGE|RSB_FLAG_WANT_CSR_STORAGE)  --- see RSB_DO_FLAGS_EXTRACT_STORAGE(flags) */
1073 #define RSB_FLAGS_RSB_AGNOSTIC (RSB_FLAG_FORTRAN_INDICES_INTERFACE|RSB_FLAG_UNIT_DIAG_IMPLICIT|RSB_FLAG_UPPER|RSB_FLAG_LOWER|RSB_FLAG_SORTED_INPUT|RSB_FLAG_TRIANGULAR|RSB_FLAG_SYMMETRIC|RSB_FLAG_HERMITIAN)
1074 
1075 #define RSB_INDEX_FIT_IN_HALFWORD(I) ((I)<=RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t))
1076 #define RSB_INDICES_FIT_IN_HALFWORD(I,J) ( RSB_INDEX_FIT_IN_HALFWORD(I) && RSB_INDEX_FIT_IN_HALFWORD(J) )
1077 
1078 #define RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(V)  						\
1079 	if(RSB_DO_FLAG_FILTEROUT((V),RSB_FLAGS_RSB_AGNOSTIC)==RSB_FLAG_NOFLAGS)	\
1080  		RSB_DO_FLAG_ADD((V),RSB_FLAG_DEFAULT_MATRIX_FLAGS);
1081 
1082 #define RSB_DIVIDE_IN_CHUNKS(N,NTHREADS) RSB_MAX(((N)+(NTHREADS)-1)/(NTHREADS),1)
1083 #define RSB_EXIT exit
1084 #define RSB_DO_ERR_RETURN(ERRVAL) {return (ERRVAL);}
1085 #define RSB_DO_MTX_RETURN(MATRIX,ERRVAL) {return (MATRIX);}
1086 #define RSB_FLAG_UPPTRI (RSB_FLAG_UPPER|RSB_FLAG_LOWER)
1087 #define RSB_DO_FLAG_FLIP_UPLO(V)	{\
1088 if(RSB_DO_FLAG_HAS((V),RSB_FLAG_UPPER)) \
1089 	RSB_DO_FLAG_DEL((V),RSB_FLAG_UPPER),RSB_DO_FLAG_ADD((V),RSB_FLAG_LOWER); \
1090 else \
1091 if(RSB_DO_FLAG_HAS((V),RSB_FLAG_LOWER)) \
1092 	RSB_DO_FLAG_ADD((V),RSB_FLAG_UPPER),RSB_DO_FLAG_DEL((V),RSB_FLAG_LOWER); \
1093 }
1094 #define RSB_PERR_GOTO(LABEL,...) {RSB_ERROR(__VA_ARGS__);goto LABEL;}
1095 #define RSB_SERR_GOTO(LABEL)     {goto LABEL;}
1096 
1097 #define RSB_SYMMETRY_STRING(FLAGS) (RSB_DO_FLAG_HAS(FLAGS,RSB_FLAG_HERMITIAN)?"hermitian":(RSB_DO_FLAG_HAS(FLAGS,RSB_FLAG_SYMMETRIC)?"symmetric":"general"))
1098 
1099 typedef float rsb_float_t;
1100 #define RSB_FLOAT_ONE 1.0f
1101 
1102 #define RSB_RECURSION_MIN_DIM (2)
1103 #define RSB_RECURSION_MIN_NNZ (4)
1104 
1105 /*!
1106  An integer type for thread indices.
1107  */
1108 typedef int rsb_thread_t;
1109 
1110 /*! \internal  */
1111 typedef rsb_flags_t rsb_order_t;
1112 
1113 
1114 
1115 /** \internal \todo:OBSOLETE, REMOVE */
1116 #define RSB_FLAG_OBSOLETE_BLOCK_ASYMMETRIC_Z_SORTING	 	0x008000
1117 
1118 /** \internal \todo:obsolete: FIXME  */
1119 /*#define RSB_FLAG_BLOCK_ASYMMETRIC_Z_SORTED	 	0x010000*/
1120 /** \internal \todo:temporary fix: FIXME  */
1121 #define RSB_FLAG_FIX_FOR_BINARY_LOADED_MATRIX		 	0x010000
1122 
1123 /** \internal \todo:EXPERIMENTAL */
1124 #define RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR	 	0x020000
1125 
1126 /** if set, the matrix will be partitioned with a block size chosen automatically */
1127 #define RSB_FLAG_AUTO_BLOCKING				0x80000000	/* FIXME: obsolete, unsupported */
1128 
1129 
1130 /** if set, will decide between column or row major for each (leaf) matrix (NEW: EXPERIMENTAL) */
1131 /*#define RSB_FLAG_WANT_AUTO_MAJOR_ORDER 			0x200000	*/	/* Unsupported */
1132 
1133 
1134 #if 0
1135 /** if set, the matrix ..  */
1136 #define RSB_FLAG_WANT_RECURSIVELY_NON_UNIFORM_AUTO_BLOCKING 0x000200	/* experimental, but works well */
1137 #endif /* 0 */
1138 
1139 /** if set, the matrix will take possession of partitioning arrays p_r and p_c on input. if unset, a copy will be made	*/
1140 #define RSB_FLAG_OWN_PARTITIONING_ARRAYS		0x000080	/*  */
1141 
1142 /** if set, the blocks will be linked in some way */
1143 /* FIXME: delete this flag */
1144 /*#define RSB_FLAG_WANT_LINKED_STORAGE 			0x000400*/
1145 
1146 /** if set, operating routines will check input more aggressively (may break operation)  */
1147 #define RSB_FLAG_SHOULD_DEBUG 				0x000800
1148 
1149 /** if set, the block partitioning will be fixed but VBR or LBR (Unsupported)	*/
1150 #define RSB_FLAG_WANT_FIXED_BLOCKING_VBR	 	0x001000
1151 
1152 /** if set, will mark a leaf matrix */
1153 #define RSB_FLAG_NON_ROOT_MATRIX	 	0x100000
1154 
1155 /** if set, the matrix will be prevent from being subdivided too much (OUTLAWED) */
1156 /*#define RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES 		0x4000000*/
1157 
1158 /**
1159  * if set, the blocks will cycle column after column.
1160  * if RSB_FLAG_WANT_BCSS_STORAGE is also set, the matrix storage format will be BCSC.
1161  * otherwise it will be VBC.
1162  * */
1163 /* see rsb.h*/
1164 /*#define RSB_FLAG_WANT_COLUMN_MAJOR_ORDER 		0x4000000*/
1165 
1166 
1167 /** if set, the code will sort the input 			*/
1168 #define RSB_FLAG_SORT_INPUT			0x2000000	/* FIXME: delete this flag */
1169 
1170 
1171 /** a parameter to determine if a matrix is really 'small' or not (FIXME) */
1172 #define RSB_EXPERIMENTAL_MIN_LEAF_ELEMENTS 		1024
1173 
1174 /*#define RSB_FLAG_RECURSIVE_SHRINK_BOUNDING_BOX		0x40000000*/
1175 
1176 #if 0
1177 /* only flags left :  */
1178 #define RSB_FLAG_ALLOW_PARALLEL_OPERATION		0x40000000		/* NEW : UNUSED */
1179 #endif /* 0 */
1180 
1181 #if 0
1182 #define RSB_FLAG_DEFAULT		 		(RSB_FLAG_DISCARD_ZEROS  /*| RSB_FLAG_WANT_BCSS_STORAGE*/ /* | RSB_FLAG_SORT_INPUT*/)
1183 #endif /* 0 */
1184 
1185 
1186 /**
1187  * \brief It is an internal structure, so beware, you should not use it.
1188  * \internal
1189  *
1190  * This structure will be used for keeping information about matrix partitioning.
1191  * It should be used primarily during matrix building, when the matrix arrays are
1192  * not all allocated.
1193  *
1194  * It is an internal structure, so beware, you should not use it.
1195  * */
1196 struct rsb_mtx_partitioning_info_t
1197 {
1198 	rsb_blk_idx_t M_b, K_b;		/**< just as in rsb_mtx_t */
1199 	rsb_blk_idx_t br, bc;			/**< block row and column size (only if BCSR) (NEW) */
1200 	rsb_coo_idx_t *rpntr,*cpntr;		/**< just as in rsb_mtx_t */
1201 
1202 	rsb_coo_idx_t nr,nc;			/**< just as in rsb_mtx_t */
1203 	rsb_submatrix_idx_t should_subdivide_levels;		/**< for recursive partitioning (EXPERIMENTAL) */
1204 };
1205 
1206 
1207 typedef signed   long rsb_long_t;		/* FIXME :internals, (still) unused */
1208 
1209 #define	RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT__VAL 0x10
1210 #define RSB_OP_FLAG_WANT_SERIAL__VAL 0x2
1211 #define RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL_VAL (RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT__VAL+RSB_OP_FLAG_WANT_SERIAL__VAL)
1212 /*!
1213  * \ingroup gr_internals
1214  * \brief An internal, helper enumeration.
1215  * \internal
1216  */
1217 enum rsb_op_flags_t { 	RSB_OP_FLAG_DEFAULT=0x1, /* normal operation */
1218        			RSB_OP_FLAG_WANT_SERIAL=RSB_OP_FLAG_WANT_SERIAL__VAL,
1219 		       	RSB_OP_FLAG_MAY_PARALLEL=0x4,
1220 			RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE=0x5, /* will process only diagonal blocks */
1221 			RSB_OP_FLAG_FAKE_LOCK=0x6, /* will perform operations with no locking (thus giving incorrect results) just to determine lock overhead */
1222        			RSB_OP_FLAG_WANT_PARALLEL_SORT=0x7,
1223        			RSB_OP_FLAG_WANT_SERIAL_SORT=0x8,
1224        			RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT=RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT__VAL,
1225        			RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL=RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL_VAL,
1226        			RSB_OP_FLAG_WANT_TRACE_PLOT=0x10
1227 			};
1228 
1229 #define RSB_BLOCK_ROWMAJOR_ADDRESS(P,LDP,NR,NC,R,C,ES) \
1230 	((rsb_char_t*)P)+((size_t)(ES))*((LDP)*(R)+(C))
1231 #define RSB_BLOCK_COLMAJOR_ADDRESS(P,LDP,NR,NC,R,C,ES) \
1232 	((rsb_char_t*)P)+((size_t)(ES))*((LDP)*(C)+(R))
1233 
1234 #define RSB_BLOCK_X_MAJOR_REFERENCE(A,LDP,R,C,ONEIFISCOLMAJOR) \
1235 	A[(ONEIFISCOLMAJOR)?((LDP)*(C)+(R)):((LDP)*(R)+(C))]
1236 
1237 #define RSB_SOME_ERROR(ERRVAL) ((ERRVAL)!=RSB_ERR_NO_ERROR)
1238 
1239 #define RSB_USE_OMP_SET_NUM_THREADS 0
1240 
1241 #if RSB_USE_OMP_SET_NUM_THREADS
1242 #define rsb_set_num_threads(RNT) omp_set_num_threads(RNT)
1243 #define rsb_get_num_threads()    omp_get_num_threads()
1244 #else
1245 #define rsb_set_num_threads(RNT) rsb__set_num_threads(RNT)
1246 #define rsb_get_num_threads()    rsb__set_num_threads(RSB_THREADS_GET)
1247 #endif
1248 #define RSB_DO_THREADS_PUSH(RNT)	{if((RNT)>0)rsb_set_num_threads(RNT); /* push */}
1249 #define RSB_DO_THREADS_POP(RNT,ORNT)	{if((RNT)>0)rsb_set_num_threads(ORNT); /* pop */}
1250 
1251 #if defined(RSB_WANT_RSB_NUM_THREADS) && (RSB_WANT_RSB_NUM_THREADS>0)
1252 #define RSB_NUM_THREADS_DECL	const char * rnt_str = getenv("RSB_NUM_THREADS"); rsb_int_t ornt = rsb_get_num_threads(), rnt = (rnt_str? rsb__util_atoi(rnt_str) :0);
1253 #define RSB_NUM_THREADS_PUSH	{RSB_DO_THREADS_PUSH(rnt); /* push */}
1254 #define RSB_NUM_THREADS_POP	{RSB_DO_THREADS_POP(rnt,ornt); /* pop */}
1255 #else /* defined(RSB_WANT_RSB_NUM_THREADS) && (RSB_WANT_RSB_NUM_THREADS>0) */
1256 #define RSB_NUM_THREADS_DECL
1257 #define RSB_NUM_THREADS_PUSH
1258 #define RSB_NUM_THREADS_POP
1259 #endif /* defined(RSB_WANT_RSB_NUM_THREADS) && (RSB_WANT_RSB_NUM_THREADS>0) */
1260 
1261 #if defined(RSB_BLAS_WANT_EXPERIMENTAL_TUNING)
1262 #define RSB_SPB_THREADS_PUSH	{RSB_DO_THREADS_PUSH(rnt); /* push */}
1263 #define RSB_SPB_THREADS_POP	{RSB_DO_THREADS_PUSH(rnt,ornt); /* pop */}
1264 #else /* defined(RSB_BLAS_WANT_EXPERIMENTAL_TUNING) */
1265 #define RSB_SPB_THREADS_PUSH
1266 #define RSB_SPB_THREADS_POP
1267 #endif /* defined(RSB_BLAS_WANT_EXPERIMENTAL_TUNING) */
1268 
1269 #define RSB_SPB_THREADS_DEFAULT 0
1270 #define RSB_SPB_THREADS_AUTO -1
1271 #define RSB_SPB_THR_STR_AUTO -2
1272 #define RSB_SPB_THR_STR_AUTO_NEXTOP -3 /* TODO: need to diversify in thr.-only vs str.+thr. tuning */
1273 
1274 /* #define RSB_PRINT_THREAD_STATS RSB_STDOUT("rsb_want_threads / rsb_g_threads / omp_get_max_threads / omp_get_num_threads / omp_get_thread_limit: %d / %d / %d / %d / %d\n",rsb_global_session_handle.rsb_want_threads,rsb_global_session_handle.rsb_g_threads,omp_get_max_threads(),omp_get_num_threads(),omp_get_thread_limit()); */
1275 #define RSB_PRINT_THREAD_STATS RSB_STDOUT("rsb_want_threads / rsb_g_threads / omp_get_max_threads / omp_get_num_threads / omp_get_thread_limit: %d / %d / %d / %d\n",(int)rsb_global_session_handle.rsb_want_threads,(int)rsb_global_session_handle.rsb_g_threads,(int)omp_get_max_threads(),(int)omp_get_num_threads());
1276 
1277 #define RSB_ERRMSG_NOSTREAMS "streams usage configured out."
1278 #define RSB_ERRMSG_BADFORMAT "submatrix format unrecognized."
1279 #define RSB_ERRMSG_NOTMTXMKT "not a Matrix Market format matrix"
1280 #define RSB_ERRMSG_FILEOPENP "problems opening"
1281 #define RSB_ERRMSG_PROIFAMM "problems reading or interpreting file as Matrix Market"
1282 #define RSB_ERRMSG_FILEOPENPGZ "problems opening gzipped"
1283 #define RSB_ERRMSG_TMXMKTBANNER "Could not process Matrix Market banner"
1284 #define RSB_ERRMSG_BADCOO "bad input coo elements"
1285 #define RSB_INFOMSG_SAK "is a swiss army knife for testing the library functionality and performance"
1286 
1287 #define RSB_WANT_COO_BEGIN 1
1288 
1289 #if RSB_WANT_COO_BEGIN
1290 #define RSB_MTX_HBDF(MTXAP) ((MTXAP)->RSB_MTX_BMF==RSB_MTX_BMV)
1291 #define RSB_MTX_HBDFH(MTXAP) ((MTXAP)->RSB_MTX_BDF)
1292 #define RSB_MTX_BDF nnz
1293 #define RSB_MTX_BMF nr
1294 #define RSB_MTX_BMV -1
1295 #endif /* RSB_WANT_COO_BEGIN */
1296 
1297 #define RSB_STDOUT_MATRIX_ESSENTIALS(M,MN,TN) RSB_STDOUT("%s\t%c\t%c\t%d\t%d\t%d\t%d",(const rsb_char_t*)rsb__basename(MN),rsb__do_get_symmetry_char(M),RSB_TRANSPOSITION_AS_CHAR(transA),TN,(M)->nr,(M)->nc,(M)->nnz)
1298 #define RSB_FPRINTF_MATRIX_ESSENTIALS(FD,M,MN,TN) RSB_FPRINTF(FD,"%s\t%c\t%c\t%d\t%d\t%d\t%d",(const rsb_char_t*)rsb__basename(MN),rsb__do_get_symmetry_char(M),RSB_TRANSPOSITION_AS_CHAR(transA),TN,(M)->nr,(M)->nc,(M)->nnz)
1299 #define RSB_FPINV(FPV) (1.0/(FPV))
1300 #define RSB_MILLION_I 1000000
1301 #define RSB_MILLION_F 1000000.0
1302 #define RSB_CLEARTERM_STRING "\x1B\x4D"
1303 /*#define RSB_MAX_SHORTIDX_MATRIX_DIM (RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t)-RSB_NNZ_BLK_MAX)*/
1304 #define RSB_MAX_SHORTIDX_MATRIX_DIM (RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t))
1305 #define RSB_BENCH_PROG_OPTS \
1306 	    {"nthreads",	required_argument, NULL, 0x6E},/* n */
1307 #define RSB_MAX_ALLOCATABLE_MEMORY_CHUNK \
1308 ((size_t)((sizeof(void*)==sizeof(unsigned int))? RSB_MAX_VALUE_FOR_TYPE(unsigned int):RSB_MAX_VALUE_FOR_TYPE(size_t)))
1309 #define RSB_DOES_TRANSPOSE(TRANSA) ((TRANSA)!=RSB_TRANSPOSITION_N)
1310 #define RSB_DOES_NOT_TRANSPOSE(TRANSA) (!RSB_DOES_TRANSPOSE(TRANSA))
1311 #define RSB_DOES_CONJUGATE(TRANSA) ((TRANSA)==RSB_TRANSPOSITION_C)
1312 #define RSB_DOES_NOT_CONJUGATE(TRANSA) (!RSB_DOES_CONJUGATE(TRANSA))
1313 #define RSB_MTX_TRANSPOSED_ROWS(MTX,TRANSA) (RSB_DOES_TRANSPOSE((TRANSA))?(MTX)->nc:(MTX)->nr)
1314 #define RSB_MTX_TRANSPOSED_COLS(MTX,TRANSA) (RSB_DOES_TRANSPOSE((TRANSA))?(MTX)->nr:(MTX)->nc)
1315 #define RSB_MTX_DIAG_SIZE(MTX) RSB_MIN( (MTX)->nc,(MTX)->nr )
1316 #define RSB_MTX_DIAG_SIZE_BLK(MTX)  RSB_MTX_DIAG_SIZE_BLK(MTX) + RSB_NNZ_BLK_MAX
1317 
1318 #define RSB_ALLOW_ZERO_DIM RSB_MIN_MATRIX_DIM == 0
1319 #define RSB_ANY_MTX_DIM_ZERO(MTXAP) ((MTXAP) && (((MTXAP)->nr==0)||(MTXAP)->nc==0))
1320 
1321 #if defined(RSB_WANT_OMP_RECURSIVE_KERNELS) && (RSB_WANT_OMP_RECURSIVE_KERNELS>0)
1322 #define RSB_NT rsb_global_session_handle.rsb_g_threads
1323 #define RSB_NTC num_threads(RSB_NT)
1324 #else
1325 #define RSB_NT
1326 #define RSB_NTC
1327 #endif
1328 #define RSB_STORE_IDXSA 1
1329 
1330 #define RSB_ASSIGN_IF_SP(DSTV,SRCP) 	\
1331 	if ( (SRCP) != NULL )		\
1332 		(DSTV) = *(SRCP);		/* FIXME: move this declaration elsewhere */
1333 
1334 #define RSB_ASSIGN_IF_DP(DSTP,SRCV) 	\
1335 	if ( (DSTP) != NULL )		\
1336 		*(DSTP) = (SRCV);		/* FIXME: move this declaration elsewhere */
1337 
1338 #define RSB_ASSIGN_IF(DSTP,SRCV) RSB_ASSIGN_IF_DP(DSTP,SRCV)
1339 
1340 #ifdef RSB_HAVE_ASSERT_H
1341 #ifdef RSB_USE_ASSERT
1342 /* ok, no extra action needed */
1343 #else /* RSB_USE_ASSERT */
1344 /* according to POSIX.1-2001, C89, C99, NDEBUG will cause assert to generate no code.  */
1345 #define NDEBUG 1
1346 #endif /* RSB_USE_ASSERT */
1347 #include <assert.h>	/* the assert() macro */
1348 #endif /* RSB_HAVE_ASSERT_H */
1349 
1350 #include "rsb.h"		/* public API specification */
1351 #include "rsb_init.h"		/* initialization functions */
1352 #include "rsb_rec.h"		/* recursion handling functions */
1353 #include "rsb_permute.h"	/* permutation functions */
1354 #include "rsb_srt.h"		/* sorting functions */
1355 #include "rsb_mergesort.h"	/* sorting functions */
1356 #include "rsb_merge.h"		/* merging functions */
1357 #include "rsb_srtp.h"		/* parallel sorting functions */
1358 #include "rsb_prec.h"		/* toy preconditioning */
1359 #include "rsb_msort_up.h"	/* sorting functions, adapted from PSBLAS */
1360 #include "rsb_unroll.h"		/* computational kernels */
1361 #include "rsb_is.h"			/* coordinate handling functions */
1362 #include "rsb_src.h"		/* search functions */
1363 #include "rsb_clone.h"		/* clone functions */
1364 #include "rsb_err.h"		/* error handling functions */
1365 #include "rsb_internals.h"		/* */
1366 #include "rsb_do.h"		/* */
1367 #include "rsb_mio.h"			/* I/O functions */
1368 #include "rsb_get.h"		/* matrix getter functions */
1369 #include "rsb_set.h"		/* matrix setter functions */
1370 #include "rsb_dump.h"		/* matrix dumping functions */
1371 #include "rsb_coo.h"		/* coordinate handling functions */
1372 #include "rsb_csr.h"		/* csr handling functions */
1373 #include "rsb_blas_stuff.h"		/* BLAS like stuff */
1374 #include "rsb_op.h"			/* */
1375 #include "rsb_bio.h"		/* Binary Matrix I/O */
1376 #include "rsb_asm.h"		/* Matrix assembly functions */
1377 #include "rsb_coo_check.h"	/* */
1378 #include "rsb_coo_symm.h"		/* */
1379 #include "rsb_idx.h"		/* index manipulation */
1380 /* #include "rsb_ftn.h"*/		/* fortran interface functions (obsolete) */
1381 #include "rsb_libspblas_handle.h"	/*  */
1382 #include "rsb_render.h"		/* matrix as pixmap rendering functions */
1383 #include "rsb_eps.h"		/* matrix as (encapsulated) postscript rendering functions */
1384 #include "rsb_gen.h"		/* matrix generating functions */
1385 #include "rsb_sys.h"		/* system related functions */
1386 #include "rsb_mbw.h"		/* memory benchmark related functions */
1387 #include "rsb_limiter.h"	/*  */
1388 #include "rsb_fpb.h"		/* floating point benchmark related functions */
1389 #include "rsb_garbage.h"		/* misc helpers routines */
1390 #include "rsb_pcnt.h"		/* performance counters code */
1391 #include "rsb_perf.h"		/* performance info gathering code */
1392 #include "rsb_pr.h"		/* performance reporting */
1393 #include "rsb_util.h"		/* sorting and computational stuff */
1394 #include "rsb_spmv.h"		/* sparse matrix-vector multiplication */
1395 #include "rsb_swt.h"		/* switching format functions */
1396 #include "rsb_lock.h"		/* */
1397 #include "rsb_partition.h"	/* custom partitioning stuff (OBSOLETE) */
1398 #include "rsb_krnl.h"		/* kernels rsb_krnlers */
1399 #include "rsb_krnl_vb.h"	/* vb specific functions */
1400 /* #include "libspblas_tests.h" */	/*  */
1401 #include "rsb_test_accuracy.h"	/* accuracy testing functions */
1402 #include "rsb_krnl_bcss.h"	/* bcss specific functions */
1403 #include "rsb_krnl_bcoo_spmv_u.h"	/* bcoo specific functions */
1404 #include "rsb_bench.h"		/* performance info gathering code (OBSOLETE) */
1405 #include "rsb_spgemm.h"		/* sparse matrices multiplication */
1406 #include "rsb_spgemm_csr.h"	/* sparse matrices multiplication */
1407 #include "rsb_spsum_misc.h"	/* sum of Sparse Matrices */
1408 #include "rsb_spsum.h"		/* Sum of Sparse Matrices */
1409 #include "rsb_spsv.h"		/* */
1410 #include "rsb_lbl.h"		/* OBSOLETE */
1411 /* #include "rsb_experiments.h" */	/* experiments (obsolete) */
1412 #include "rsb_coo2rec.h"		/* */
1413 #include "rsb_rec2coo.h"		/* */
1414 #include "rsb_rec2csr.h"		/* */
1415 #include "rsb_csr2coo.h"		/* */
1416 #include "rsb_cpmv.h"		/* */
1417 #include "rsb_tune.h"		/* */
1418 
1419 #ifdef __cplusplus
1420 }
1421 #endif  /* __cplusplus */
1422 
1423 #endif /* RSB_COMMON_H_INCLUDED */
1424 /* @endcond */
1425