1 /*
2
3 Copyright (C) 2008-2021 Michele Martone
4
5 This file is part of librsb.
6
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20
21 */
22 /* @cond INNERDOC */
23 /**
24 * @author Michele Martone
25 * @file
26 * @brief Low level routines and tools for our sparse matrix formats implementations.
27 */
28 #ifndef RSB_COMMON_H_INCLUDED
29 #define RSB_COMMON_H_INCLUDED
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif /* __cplusplus */
34 #ifdef __cplusplus
35 #define restrict /* for now, the restrict keyword is not allowed in C++ */
36 #endif /* __cplusplus */
37 /**
38 *
39 * VBR internals the user shouldn't use, no way.
40 * This file contents are not meant to be used as an API (Application Programming Interface).
41 *
42 * Manipulate this file at your own risk.
43 *
44 */
45 #include <stdlib.h> /* bsearch, calloc, malloc */
46 #include <stdio.h> /* printf */
47
48 #ifdef HAVE_CONFIG_H /* hopefully, the only non-RSB_ prefixed symbol of ours */
49 #define RSB_HAVE_CONFIG_H HAVE_CONFIG_H
50 #endif /* HAVE_CONFIG_H */
51
52 #ifdef RSB_HAVE_CONFIG_H /* autotools makefiles define this */
53 #include "rsb-config.h" /* this should be the first include */
54 #endif /* RSB_HAVE_CONFIG_H */
55 #if RSB_WANT_OMP_RECURSIVE_KERNELS
56 #include <omp.h>
57 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
58 #ifdef RSB_HAVE_UNISTD_H
59 #include <unistd.h> /* getopt, gethostname (some system don't have it here!) */
60 #endif /* RSB_HAVE_UNISTD_H */
61 #ifdef RSB_HAVE_GETOPT_H
62 /* RSB_HAVE_GETOPT_LONG */
63 #include <getopt.h> /* getopt_long is not always available (e.g.: on AIX, or any non GNU system) */
64 typedef struct option rsb_option;
65 #else /* RSB_HAVE_GETOPT_H */
66 /*#undef required_argument*/
67 /*#undef no_argument*/
68 /*#undef optional_argument*/
69 #define required_argument 0 /* ONLY A STUB */
70 #define no_argument 1 /* ONLY A STUB */
71 #define optional_argument 2 /* ONLY A STUB */
72 extern char *optarg;
73 extern int optind, opterr, optopt;
74
75 struct rsb_option_struct {
76 const char *name;
77 int has_arg;
78 int *flag;
79 int val;
80 };
81 /*!
82 * \ingroup gr_internals
83 * \brief An internal, helper structure.
84 */
85 typedef struct rsb_option_struct rsb_option;
86 #endif /* RSB_HAVE_GETOPT_H */
87 #include <ctype.h> /*isdigit*/
88
89 /*
90 * Comment / uncomment the following to your likes
91 */
92 #define RSB_WITH_MM
93 /*#undef RSB_WITH_MM*/
94
95 #ifdef RSB_WITH_MM
96 #include "rsb_mmio.h"
97 #endif /* RSB_WITH_MM */
98
99
100 /* the NDEBUG and DEBUG symbols will affect lot of checking code */
101 /* these flags are NOT supported : they should be debugged :) */
102 /*#define NDEBUG 1*/
103 /*#undef NDEBUG*/
104
105 /*
106 #ifdef NDEBUG
107 #define DEBUG 1
108 #endif
109 */
110
111 /*!
112 \internal
113 For all the situations where 'int' would be used.
114 */
115 typedef int rsb_int;
116
117 /* some ill situations could arise the need for this */
118 #if 0
119 #ifndef NULL
120 #define NULL ((VOID *)(0))
121 #endif
122 #endif
123 #define RSB_NUL '\0'
124
125 /* #define RSB_INLINE inline */ /* experimental */
126 #define RSB_INLINE /* experimental */
127
128 /* if defined, less partitioning arrays allocations will occur for plain CSR and CSC matrices (EXPERIMENTAL) */
129 #define RSB_WANT_EXPERIMENTAL_IN_PLACE_RECURSIVE 0 /** EXPERIMENTAL */
130 #define RSB_WANT_EXPERIMENTAL_NO_EXTRA_CSR_ALLOCATIONS 1 /** EXPERIMENTAL: should avoid extra (non BCSR-related) arrays*/
131 #define RSB_WANT_EXPERIMENTAL_FILLIN_ESTIMATOR 2 /* */
132
133 #define RSB_WANT_INDEX_BASED_SORT 1 /**< if defined, will enable a faster index+permutation based sorting (EXPERIMENTAL) */
134 #define RSB_WANT_Z_SORT 1 /**< if 1, enable Z sorting at all */
135 #define RSB_FORTRAN_VERBOSE_CALLS 0 /**< */
136 #define RSB_EXPERIMENTAL_WANT_PURE_BCSS 0 /**< EXPERIMENTAL : for BCSR, will prevent from allocating VBR arrays */
137 #define RSB_EXPERIMENTAL_USE_PURE_BCSS 1 /**< EXPERIMENTAL : for BCSR, will prevent from using VBR arrays */
138 #define RSB_EXPERIMENTAL_USE_PURE_BCSS_FOR_CONSTRUCTOR 1 /**< EXPERIMENTAL : for BCSR, will prevent from using VBR arrays */
139
140 #define RSB_WANT_OMP_RECURSIVE_SPSV 1 /**< EXPERIMENTAL */
141 #define RSB_WANT_OMP_RECURSIVE_SPMV 1 /**< EXPERIMENTAL */
142 #define RSB_EXPERIMENTAL_FORCE_ROW_SUBDIVISIONS_UNTIL_CORES_NUMBER 0 /**< EXPERIMENTAL */
143 #define RSB_EXPERIMENTAL_ONE_SINGLE_LOCK_FOR_ALL_SUBMATRICES (1&&RSB_WANT_OMP_RECURSIVE_KERNELS) /**< EXPERIMENTAL: incompatible with the prev. */
144
145 #define RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT 1
146 #define RSB_ALLOW_EMPTY_MATRICES 1
147
148 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS (1&&RSB_WANT_RSB_AS_ONLY_ALLOWED_FORMAT ) /**< EXPERIMENTAL */
149
150 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS2 (1&&RSB_WANT_OMP_RECURSIVE_KERNELS) /**< EXPERIMENTAL */
151 /* mutually exclusive options for : RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_BLOCKS2 */
152 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_WITHOUT_BLOCKING 1 /**< EXPERIMENTAL */
153 #define RSB_EXPERIMENTAL_ALTERNATING_SUBMATRIX_HEURISTIC 0 /**< EXPERIMENTAL */
154 #define RSB_EXPERIMENTAL_WORK_BALANCING_HEURISTIC 1 /**< EXPERIMENTAL */
155
156 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_LINKED_LIST 0 /**< EXPERIMENTAL */
157 #define RSB_EXPERIMENTAL_SHOULD_TRAVERSE_RECURSIVE_MATRICES_AS_TREE (0) /**< The standard mechanism, 2-partitioned. */
158 #define RSB_SHOULD_FAIL_INIT_IF_MEMHIER_DETECTION_FAILS 1 /**< */
159
160 #define RSB_EXPERIMENTAL_NO_SUBDIVIDE_ON_MIN_NNZ_PER_ROW_OR_COLUMN 1 /**< Block matrix subdivision under a threshold */
161 #define RSB_EXPERIMENTAL_ROWS_SUBDIVIDE_TO_CORES_NUM 1 /**< Subdivide to obtain no less matrices than cores */
162 #define RSB_CONST_MIN_NNZ_PER_ROW_OR_COLUMN_PER_SUBMATRIX 3 /**< Lower threshold for nnz/m or nnz/k, for subdivision, should be determined heuristically */
163
164 #define RSB_EXPERIMENTAL_WANT_BEST_TIMES 1 /**< EXPERIMENTAL */
165 #define RSB_EXPERIMENTAL_UNLIMITED_RECURSION 0 /**< EXPERIMENTAL */
166 #define RSB_EXPERIMENTAL_MORTON_ORDERED_RECURSION 0 /**< EXPERIMENTAL, UNFINISHED, just for demo purposes */
167
168 #define RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY_NAIVE 0 /**< EXPERIMENTAL */
169 #define RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY_UNSYMMETRIC 1 /**< EXPERIMENTAL, UNIMPLEMENTED */
170 #define RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY RSB_EXPERIMENTAL_QUAD_DIVISION_POLICY_NAIVE
171
172 #define RSB_CONST_IMPOSSIBLY_BIG_TIME 1000000000 /**< in seconds, used when computing 'minimum' running times. any measured time interval should be less than RSB_CONST_IMPOSSIBLY_BIG_TIME. */
173 #define RSB_MIN_ABOVE_INF(X,Y,MIN) RSB_MAX(RSB_MIN(X,Y),MIN)
174 #define RSB_CONST_TIME_FOR_MICRO_BENCHMARK 0.1 /**< */
175 #define RSB_CONST_MIN_TIMES_FOR_MICRO_BENCHMARK 10 /**< */
176
177 #define RSB_EXPERIMENTAL_EXPAND_SYMMETRIC_MATRICES_BY_DEFAULT 0 /**< EXPERIMENTAL */
178 #define RSB_WANT_BOUNDED_BOXES 1
179 #define RSB_WANT_BOUNDED_BOXES_SPMV (RSB_WANT_BOUNDED_BOXES && 1)
180 #define RSB_WANT_BOUNDED_BOXES_SPSV (RSB_WANT_BOUNDED_BOXES && 1)
181 #define RSB_WANT_EARLY_PARALLEL_REGION_JUMPOUT_SPMV 1
182 #define RSB_WANT_SM_TO_THREAD_MOD_MAPPING 1
183 #define RSB_WANT_EARLY_PARALLEL_REGION_JUMPOUT_SPSV 1
184
185 #define RSB_WANT_BITMAP 0
186 /** if RSB_WANT_BITMAP, should att to rsb_mtx_t:
187 * auxiliary data structures */
188 /*struct rsb_options_t *options;*/ /* FIXME: deprecated -- will be deleted soon */
189
190 #define RSB_WANT_DEBUG_VERBOSE_INTERFACE_NOTICE (/*RSB_OUT_ERR_VERBOSITY>0 &&*/ RSB_INT_ERR_VERBOSITY>0)
191 #define RSB_WANT_PERFORMANCE_FILE 0
192 #if defined(RSB_HAVE_SIGNAL_H) /* && defined(RSB_HAVE_BITS_SIGACTION_H) */
193 #define RSB_WANT_ACTION 1
194 #else
195 #define RSB_WANT_ACTION 0
196 #endif
197 /* #define RSB_WANT_ACTION RSB_ALLOW_INTERNAL_GETENVS */ /* 1 */
198 #if RSB_WANT_ACTION
199 #define RSB_SHALL_QUIT ( rsb__quit_rsbench != 0 )
200 #define RSB_INTERNALS_RSBENCH_HEAD_DECLS extern int rsb__quit_rsbench;
201 void rsb__sigh(int signal);
202 #define RSB_SIGHR rsb__sigr();
203 #else /* RSB_WANT_ACTION */
204 #define RSB_SHALL_QUIT ( 0 != 0 )
205 #define RSB_INTERNALS_RSBENCH_HEAD_DECLS
206 #define RSB_SIGHR
207 #endif /* RSB_WANT_ACTION */
208
209 #include "rsb_struct.h" /* */
210
211 /* @cond INNERDOC */
212 /*!
213 * \internal
214 * \ingroup gr_internals
215 * \brief An internal, helper structure (OBSOLETE).
216 *
217 * A rsb_options_t structure could keep track of helper information
218 * like :
219 *
220 * - desired nonzero pattern
221 * - pointers to optimal or desired functions for certain operations
222 * - ..
223 * - .. data which is perfectly optional
224 * */
225 struct rsb_options_t{
226 /** An auxiliary bitmap */
227 rsb_bitmap_data_t *bitmap;
228 double a;
229 };
230 /* @endcond */
231
232
233 #if RSB_FORTRAN_VERBOSE_CALLS
234 #define RSB_FORTRAN_VERBOSE_CALL(M) RSB_ERROR(M)
235 #else /* RSB_FORTRAN_VERBOSE_CALLS */
236 #define RSB_FORTRAN_VERBOSE_CALL(M)
237 #endif /* RSB_FORTRAN_VERBOSE_CALLS */
238
239
240 /* DEBUG FLAGS */
241 /*
242 Enable any combination of the following flags to activate debug code.
243 Do this only when debugging/developing, because it will slow down the code a lot,
244 */
245
246 #define RSB_WANT_DEBUG_PARANOID_ASSERTS 0 /**< if 1, will activate a number of assert() calls which won't change the code flow but will check for anomalous error conditions */
247 #if RSB_WANT_DEBUG_PARANOID_ASSERTS
248 #define RSB_DEBUG_ASSERT(e) assert(e)
249 #else /* RSB_WANT_DEBUG_PARANOID_ASSERTS */
250 #define RSB_DEBUG_ASSERT(e)
251 #endif /* RSB_WANT_DEBUG_PARANOID_ASSERTS */
252
253 #define RSB_ASSERT(e) assert(e) /* NOTE: in the future, could be replaced with some {if(..)goto err;} or exit()-like statement */
254
255 /* commented out 20120915, since it was not used anyway most of the time
256 #ifdef DEBUG
257 #define RSB_DEBUG_BITMAP 1
258 #define RSB_DEBUG_BLOCK_STUFF 1
259 #define RSB_DEBUG_SORT_STUFF 1
260 #endif
261 */
262
263 /*#define RSB_QUIET 1*/
264
265 /* FIXME : TODO : join these macros as a single debug flag */
266 #define RSB_MEM_DEBUG 0 /* if 1, will make trigger printouts on allocations and deallocations */
267 #define RSB_QUIET_MEM_ERRORS 0 /* if 1, will not even print out fatal error conditions */
268
269 /* END DEBUG FLAGS */
270
271
272 /** Macros to check basic indices values validity ( FIXME : unfinished, should be much stricter ) */
273 #define RSB_INVALID_COO_INDEX(I) ((I)>RSB_MAX_MATRIX_DIM || (I)<0) /* should fail only if signed */
274 #define RSB_INVALID_NNZ_INDEX(I) ((I)>RSB_MAX_MATRIX_NNZ)
275 #define RSB_INVALID_BLK_INDEX(I) (RSB_INVALID_COO_INDEX(I))
276 #define RSB_INVALID_NNZ_COUNT(I) ((I)<1L || ( RSB_NNZ_ADD_OVERFLOW((I),RSB_INDEX_OF_SAFE_EXTRA) ))
277 #define RSB_INVALID_NNZ_COUNT_FOR_FLAGS(I,F) ((!RSB_DO_FLAG_HAS((F),RSB_FLAG_UNIT_DIAG_IMPLICIT)) && RSB_INVALID_NNZ_COUNT(I))
278
279 #define RSB_DO_FLAG_HAVE_AND(V1,V2,F) RSB_BOOL_AND(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
280 #define RSB_DO_FLAG_HAVE_NAND(V1,V2,F) RSB_BOOL_NAND(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
281 #define RSB_DO_FLAG_HAVE_XOR(V1,V2,F) RSB_BOOL_XOR(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
282 #define RSB_DO_FLAG_HAVE_OR(V1,V2,F) RSB_BOOL_OR(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
283 #define RSB_DO_FLAG_HAVE_NOR(V1,V2,F) RSB_BOOL_NOR(RSB_DO_FLAG_HAS(V1,F),RSB_DO_FLAG_HAS(V2,F))
284 #define RSB_DO_FLAG_SUBST(FLAGSVAR,FLAGS_OLD,FLAGS_NEW) RSB_DO_FLAG_DEL(FLAGSVAR,(FLAGS_OLD)), RSB_DO_FLAG_ADD(FLAGSVAR,(FLAGS_NEW))
285
286 #define RSB_INVALID_COO_COUNT(I) ((I)<1L || ( RSB_COO_ADD_OVERFLOW((I),RSB_INDEX_OF_SAFE_EXTRA) ))
287 #define RSB_IS_VALID_NNZ_COUNT(I) (!RSB_INVALID_NNZ_COUNT(I))
288 #define RSB_IS_VALID_COO_INDEX(I) (!RSB_INVALID_COO_INDEX(I))
289 #define RSB_IS_VALID_COO_DIM(I) (!RSB_INVALID_COO_COUNT(I))
290 #define RSB_IS_VALID_BLK_INDEX(I) (!RSB_INVALID_BLK_INDEX(I))
291 #define RSB_IS_VALID_NNZ_INDEX(I) (!RSB_INVALID_NNZ_INDEX(I))
292 #define RSB_IS_VALID_INCX_VALUE(I) (!RSB_INVALID_NNZ_COUNT(I))
293 #define RSB_ARE_VALID_MATRIX_INIT_PARS(R,C,NNZ,TYPE) ( \
294 RSB_IS_VALID_NNZ_COUNT(NNZ)&& \
295 RSB_IS_VALID_COO_INDEX(R)&& \
296 RSB_IS_VALID_COO_INDEX(C)&& \
297 (!RSB_MATRIX_UNSUPPORTED_TYPE(TYPE)) )
298 /*#define RSB_IS_VALID_NNZ_SUM(NZ1,NZ2) RSB_IS_VALID_NNZ_COUNT(((size_t)(NZ1))+((size_t)(NZ2)))*/
299 #define RSB_IS_VALID_NNZ_SUM(NZ1,NZ2) (((size_t)(RSB_MAX_MATRIX_NNZ))>=(((size_t)(NZ1))+((size_t)(NZ2))))
300 #define RSB_IS_INVALID_TYPE_SIZE(TS) ((TS)<1)
301
302 #define RSB_IS_VALID_TRANS(T) ((T)>=RSB_MIN(RSB_MIN(RSB_TRANSPOSITION_T,RSB_TRANSPOSITION_C),(RSB_TRANSPOSITION_N)) && (T)<=RSB_MAX(RSB_MAX(RSB_TRANSPOSITION_T,RSB_TRANSPOSITION_C),(RSB_TRANSPOSITION_N))) /* */
303 #define RSB_IS_VALID_THREAD_COUNT(C) ((C)> 0 && (C)<=RSB_CONST_MAX_SUPPORTED_CORES) /* */
304 #define RSB_IS_VALID_THREAD_SPEC(C) ((C)>=0 && (C)<=RSB_CONST_MAX_SUPPORTED_CORES) /* */
305
306 /** An initializer value for index variables. */
307 #define RSB_INI ((rsb_coo_idx_t)(-1))
308
309
310 #include <stdlib.h> /* basic types and functions definitions */
311
312 /*
313 * Bitmap stuff macros and functions.
314 * Uses row major order by default.
315 * By defining RSB_BITMAP_ROW_MAJOR_ORDER, row major order will be adopted.
316 *
317 * p.s.: please DO NOT use the following fixed macros outside the nearby macros.
318 * */
319 /*#define RSB_BITMAP_ROW_MAJOR_ORDER 1*/
320 #define RSB_BITS_PER_INT (sizeof(rsb_bitmap_data_t)*RSB_CHAR_BIT)
321 #define RSB_BITS_PER_ROW(cols) ((cols)+(RSB_BITS_PER_INT-1))
322 #define RSB_BYTES_PER_ROW(cols) ((cols+(RSB_CHAR_BIT-1))/RSB_CHAR_BIT)
323 #define RSB_INTS_PER_ROW(cols) ((cols+((RSB_BITS_PER_INT)-1))/(RSB_BITS_PER_INT))
324 #define RSB_INT_IN_ROW(cols) ((cols)/(RSB_BITS_PER_INT))
325
326 /* note that this is (logically) machine independent code */
327 #define RSB_SET_BIT(p,b) (*(rsb_bitmap_data_t*)(p))=(*(rsb_bitmap_data_t*)(p)|(1<<(b)))
328 #define RSB_UNSET_BIT(p,b) (*(rsb_bitmap_data_t*)(p))=(*(rsb_bitmap_data_t*)(p)&~(1<<(b)))
329 #define RSB_GET_BIT(p,b) ((*(rsb_bitmap_data_t*)(p))&(1<<(b)))
330 #define RSB_BITMAP_POINTER(p,rw,r,c) (((rsb_bitmap_data_t*)(p))+(RSB_INTS_PER_ROW(rw)*(r)+RSB_INT_IN_ROW(c)))
331
332 #ifdef RSB_BITMAP_ROW_MAJOR_ORDER
333 /* Note that only a swap in the following three macros is needed to switch the storage format of our bitmap */
334 #define RSB_BITMAP_GET(p,rows,cols,r,c) RSB_GET_BIT((RSB_BITMAP_POINTER((p),(rows),(c),(r))),((r)%(RSB_BITS_PER_INT)))
335 #define RSB_BITMAP_SET(p,rows,cols,r,c) RSB_SET_BIT((RSB_BITMAP_POINTER((p),(rows),(c),(r))),((r)%(RSB_BITS_PER_INT)))
336 #define RSB_BITMAP_UNSET(p,rows,cols,r,c) RSB_UNSET_BIT((RSB_BITMAP_POINTER((p),(rows),(c),(r))),((r)%(RSB_BITS_PER_INT)))
337 #define RSB_BYTES_PER_BITMAP_(ld,d) (sizeof(rsb_bitmap_data_t)*(RSB_INTS_PER_ROW(ld)) * (d))
338 #define RSB_BITMAP_CLEAR(p,rows,cols) RSB_BZERO((p),(RSB_BYTES_PER_BITMAP_(cols,rows)))
339 #else /* RSB_BITMAP_ROW_MAJOR_ORDER */
340 #define RSB_BITMAP_GET(p,rows,cols,r,c) RSB_GET_BIT((RSB_BITMAP_POINTER((p),(cols),(r),(c))),((c)%(RSB_BITS_PER_INT)))
341 #define RSB_BITMAP_SET(p,rows,cols,r,c) RSB_SET_BIT((RSB_BITMAP_POINTER((p),(cols),(r),(c))),((c)%(RSB_BITS_PER_INT)))
342 #define RSB_BITMAP_UNSET(p,rows,cols,r,c) RSB_UNSET_BIT((RSB_BITMAP_POINTER((p),(cols),(r),(c))),((c)%(RSB_BITS_PER_INT)))
343 #define RSB_BYTES_PER_BITMAP_(d,ld) (sizeof(rsb_bitmap_data_t)*(RSB_INTS_PER_ROW(ld)) * (d))
344 #define RSB_BITMAP_CLEAR(p,rows,cols) RSB_BZERO((p),(RSB_BYTES_PER_BITMAP_(rows,cols)))
345 #endif /* RSB_BITMAP_ROW_MAJOR_ORDER */
346 #define RSB_WORDS_PER_BITMAP(ld,d) ((RSB_BYTES_PER_BITMAP_(ld,d)+(sizeof(rsb_bitmap_data_t)-1))/sizeof(rsb_bitmap_data_t))
347
348 #define RSB_BYTES_PER_BITMAP(rows,cols) RSB_BYTES_PER_BITMAP_(rows,cols)
349 #define RSB_BLOCK_UNSET_BIT_FOR_NNZ(IA,JA,k,M) {rsb_coo_idx_t i=(RSB_GET_BLOCK_ROW_FOR_NZ(IA,(M))); rsb_coo_idx_t j=(RSB_GET_BLOCK_COL_FOR_NZ(JA,M)); RSB_BITMAP_UNSET((M)->options->bitmap,(M)->M_b,(M)->K_b,i,j);}
350 #define RSB_BLOCK_SET_BIT_FOR_NNZ(IA,JA,k,M) {rsb_coo_idx_t i=(RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,M)); rsb_coo_idx_t j=(RSB_GET_BLOCK_COL_FOR_NZ(JA+k,M)); RSB_BITMAP_SET((M)->options->bitmap,(M)->M_b,(M)->K_b,i,j);}
351 #define RSB_BLOCK_GET_BIT_FOR_NNZ(IA,JA,k,M) {rsb_coo_idx_t i=(RSB_GET_BLOCK_ROW_FOR_NZ(IA+k,M)); rsb_coo_idx_t j=(RSB_GET_BLOCK_COL_FOR_NZ(JA+k,M)); RSB_BITMAP_GET((M)->options->bitmap,(M)->M_b,(M)->K_b,i,j);}
352
353 /*
354 * Macros for one dimensional bitmaps -- easier to use.
355 * */
356 #ifdef RSB_BITMAP_ROW_MAJOR_ORDER
357 #define RSB_BITVECTOR_GET(p,bits,bit) RSB_BITMAP_GET(p,bits,1,bit,0)
358 #define RSB_BITVECTOR_SET(p,bits,bit) RSB_BITMAP_SET(p,bits,1,bit,0)
359 #define RSB_BITVECTOR_UNSET(p,bits,bit) RSB_BITMAP_UNSET(p,bits,1,bit,0)
360 #define RSB_BITVECTOR_CLEAR(p,bits) RSB_BITMAP_CLEAR(p,bits,1)
361 #define RSB_BYTES_PER_BITVECTOR(bits) RSB_BYTES_PER_BITMAP(bits,1)
362 #define RSB_WORDS_PER_BITVECTOR(bits) RSB_WORDS_PER_BITMAP(bits,1)
363 #else /* RSB_BITMAP_ROW_MAJOR_ORDER */
364 #define RSB_BITVECTOR_GET(p,bits,bit) RSB_BITMAP_GET(p,1,bits,0,bit)
365 #define RSB_BITVECTOR_SET(p,bits,bit) RSB_BITMAP_SET(p,1,bits,0,bit)
366 #define RSB_BITVECTOR_UNSET(p,bits,bit) RSB_BITMAP_UNSET(p,1,bits,0,bit)
367 #define RSB_BITVECTOR_CLEAR(p,bits) RSB_BITMAP_CLEAR(p,1,bits)
368 #define RSB_BYTES_PER_BITVECTOR(bits) RSB_BYTES_PER_BITMAP(1,bits)
369 #define RSB_WORDS_PER_BITVECTOR(bits) RSB_WORDS_PER_BITMAP(1,bits)
370 #endif /* RSB_BITMAP_ROW_MAJOR_ORDER */
371
372 /* given :
373 * the address of the nonzero element column index
374 * a rsb_options_t pointer
375 * will return the block column variable as it should.
376 * note that this macro will work only if o->M_b, o->K_b, o->p_r IA are properly initialized and o->p_r sorted.
377 *
378 * p.s.: note that the use of this could be avoided with a modest memory allocation...
379 * p.s.: in the following, we blindly trust that bsearch won't fail
380 * */
381 #define RSB_GET_BLOCK_COL_FOR_NZ_(columnidxp,cpntr,K_b) (((rsb_coo_idx_t*)bsearch((columnidxp),(cpntr),(K_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((cpntr))))
382 #define RSB_GET_BLOCK_COL_FOR_NZ(columnidxp,M) (RSB_GET_BLOCK_COL_FOR_NZ_((columnidxp),(M)->cpntr,(M)->K_b))
383
384 #define RSB_GET_BLOCK_ROW_FOR_NZ_(rowidxp,rpntr,M_b) (((rsb_coo_idx_t*)bsearch((rowidxp) ,(rpntr),(M_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((rpntr))))
385 #define RSB_GET_BLOCK_ROW_FOR_NZ(rowidxp ,M) (RSB_GET_BLOCK_ROW_FOR_NZ_((rowidxp),(M)->rpntr,(M)->M_b))
386
387 #define RSB_GET_BLOCK_MAJ_FOR_NZ_(majidxp,Mpntr,Md_b) (((rsb_coo_idx_t*)bsearch((majidxp) ,(Mpntr),(Md_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((Mpntr))))
388 #define RSB_GET_BLOCK_MAJ_FOR_NZ(majidxp ,M) (RSB_GET_BLOCK_MAJ_FOR_NZ_((majidxp),(M)->Mpntr,(M)->Mdim))
389
390 #define RSB_GET_BLOCK_MIN_FOR_NZ_(minidxp,mpntr,md_b) (((rsb_coo_idx_t*)bsearch((minidxp) ,(mpntr),(md_b),sizeof(rsb_coo_idx_t),(rsb__nnz_coord_compar))-((mpntr))))
391 #define RSB_GET_BLOCK_MIN_FOR_NZ(minidxp ,M) (RSB_GET_BLOCK_MIN_FOR_NZ_((minidxp),(M)->mpntr,(M)->mdim))
392
393 #define GET_BLOCK_FIRST_COLUMN(column,M) ((M)->cpntr[(column)])
394 #define GET_BLOCK_FIRST_ROW(row,M) ((M)->rpntr[ (row) ])
395
396 #define GET_BLOCK_WIDTH(column,M) (((M)->cpntr[(column)+1])-((M)->cpntr[(column)]))
397 #define GET_BLOCK_HEIGHT(row,M) (((M)->rpntr[ (row) +1])-((M)->rpntr[ (row) ]))
398
399 #define GET_BLOCK_SIZE(row,column,M) ((GET_BLOCK_WIDTH((column),(M)))*(GET_BLOCK_HEIGHT((row),(M))))
400
401 /*
402 * Blanks a whole matrix block.
403 * */
404 #define RSB_BLANK_BLOCK(BP,M,BLOCKROW,BLOCKCOLUMN) \
405 RSB_BZERO( ((rsb_byte_t*)(BP)), \
406 ( (M)->el_size * GET_BLOCK_SIZE((BLOCKROW),(BLOCKCOLUMN),(M)) ) );
407
408 #define RSB_INTRA_BLOCK_ROW(row,blockrow,M) ((row) - (M)->rpntr[(blockrow)])
409 #define RSB_INTRA_BLOCK_COLUMN(column,blockcolumn,M) ((column) - (M)->rpntr[(blockcolumn)])
410 #define RSB_GET_INTRA_BLOCK_OFFSET_ROW_MAJOR(row,column,blockrow,blockcolumn,M) ((( (row) - (M)->rpntr[(blockrow)]) * (GET_BLOCK_WIDTH((blockcolumn),(M))) + ( (column) - (M)->cpntr[(blockcolumn)] )) * (M)->el_size)
411 #define RSB_GET_INTRA_BLOCK_OFFSET_COLUMN_MAJOR(row,column,blockrow,blockcolumn,M) ((( (column) - (M)->cpntr[(blockcolumn)]) * (GET_BLOCK_HEIGHT((blockrow,(M))) + ( (row) - (M)->rpntr[(blockrow)] )) * (M)->el_size)
412
413 #define RSB_GET_INTRA_BLOCK_ROW_STRIDE(blockrow,blockcolumn,M) (GET_BLOCK_WIDTH((blockcolumn),(M)))
414 #define RSB_GET_INTRA_BLOCK_OFFSET(row,column,blockrow,blockcolumn,M) \
415 (RSB_GET_INTRA_BLOCK_OFFSET_ROW_MAJOR(row,column,blockrow,blockcolumn,M))
416 #define RSB_GET_INTRA_BLOCK_OFFSET_TRANSPOSED(row,column,blockrow,blockcolumn,M) \
417 (RSB_GET_INTRA_BLOCK_OFFSET_COLUMN_MAJOR(row,column,blockrow,blockcolumn,M))
418
419 /*!
420 * Macros for diagonal-related comparisons.
421 *
422 * \code
423 * (ROW,COL) (ROW,COL+COLS)
424 * +--------------+
425 * | |
426 * | |
427 * | |
428 * ... ...
429 * | |
430 * | |
431 * +--------------+
432 * (ROW+ROWS,COL) (ROW+ROWS,COL+COLS)
433 *
434 *
435 * under diagonal over diagonal
436 * intersects first at row COL intersects first at row ROW
437 * intersects last at row ROW+ROWS intersects last at row COL+COLS
438 *
439 * +---------------+ +-\-------------+
440 * \| | | \ |
441 * \ | | \ |
442 * |\ | | \ |
443 * ... ... ... ...
444 * | | | \|
445 * | | | \
446 * +-----\---------+ +---------------+
447 * \endcode
448 * */
449
450 #define RSB_POINT_QUASI_UNDER_DIAGONAL(ROW,COL) ((ROW)>=(COL))
451 #define RSB_POINT_QUASI_OVER_DIAGONAL(ROW,COL) ((ROW)<=(COL))
452 #define RSB_POINT_UNDER_DIAGONAL(ROW,COL) ((ROW)> (COL))
453 #define RSB_POINT_OVER_DIAGONAL(ROW,COL) ((ROW)< (COL))
454
455 #define RSB_POINT_UNDER_SUPRA_DIAGONAL(ROW,COL,OFFSET) (RSB_POINT_UNDER_DIAGONAL(((ROW)+(OFFSET)),(COL)))
456 #define RSB_POINT_UNDER_SUB_DIAGONAL(ROW,COL,OFFSET) (RSB_POINT_UNDER_DIAGONAL(((ROW)),((COL)+(OFFSET))))
457 #define RSB_POINT_OVER_SUPRA_DIAGONAL(ROW,COL,OFFSET) (RSB_POINT_OVER_DIAGONAL(((ROW)+(OFFSET)),(COL)))
458 #define RSB_POINT_OVER_SUB_DIAGONAL(ROW,COL,OFFSET) (RSB_POINT_OVER_DIAGONAL((ROW),((COL)+(OFFSET))))
459
460 /* assumes COLS>=1, ROWS>=1 */
461 #define RSB_BLOCK_CROSSED_BY_DIAGONAL(ROW,COL,ROWS,COLS) \
462 ( \
463 RSB_POINT_QUASI_UNDER_DIAGONAL((ROW)+(ROWS-1),(COL)) && \
464 RSB_POINT_QUASI_OVER_DIAGONAL((ROW),((COL)+(COLS-1))) )
465 #define RSB_BLOCK_CROSSED_BY_SUPRA_DIAGONAL(ROW,COL,ROWS,COLS,OFFSET) \
466 RSB_BLOCK_CROSSED_BY_DIAGONAL((ROW)+(OFFSET),(COL),(ROWS),(COLS))
467 #define RSB_BLOCK_CROSSED_BY_SUB_DIAGONAL(ROW,COL,ROWS,COLS,OFFSET) \
468 RSB_BLOCK_CROSSED_BY_SUPRA_DIAGONAL(COL,ROW,COLS,ROWS,OFFSET)
469 #define RSB_BLOCK_CROSSED_BY_SUPRA_OR_SUB_DIAGONAL(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET) \
470 ( \
471 (LOFFSET)>(UOFFSET)? \
472 (RSB_BLOCK_CROSSED_BY_SUB_DIAGONAL(ROW,COL,ROWS,COLS,LOFFSET)): \
473 (RSB_BLOCK_CROSSED_BY_SUPRA_DIAGONAL(ROW,COL,ROWS,COLS,UOFFSET)) )
474
475 /*
476 * The offset in the block to the first element which is on the diagonal
477 * The stride will be ROWS+1 or COLS+1, depending on the internal storage.
478 * We here assume C storage.
479 * */
480 #define RSB_BLOCK_DIAGONAL_OFFSET(ROW,COL,ROWS,COLS) \
481 ((RSB_POINT_UNDER_DIAGONAL((ROW),(COL))) ? ((ROW)-(COL)) : (((COL)-(ROW))*(COLS)) )
482 /* if the block is internally stored in Fortran, then : */
483 #define RSB_BLOCK_DIAGONAL_OFFSET_FORTRAN_STORED(ROW,COL,ROWS,COLS) \
484 ((RSB_POINT_OVER_DIAGONAL((ROW),(COL))) ? (((COL)-(ROW))*(ROWS)) : ((ROW)-(COL)) )
485
486 /* The following is ordering-neutral.
487
488 +------------------->
489 | \ +-----+ ^^
490 | \ | | ||
491 | \ | |v RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW
492 | +-\---+ v RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW
493 |<---> \ RSB_BLOCK_DIAGONAL_OFFSET
494 | \
495 v
496
497 +------------------->
498 | \ ^^
499 | \ ||
500 | +--\--+ v| RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_FIRST_ROW
501 | | \| v RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_LAST_ROW
502 | | |\
503 | +-----+ \
504 |
505 v
506
507 +------------------->
508 | ^^
509 | ||
510 | \ +-----+ ||
511 | \ | | ||
512 | \ | |v RSB_BLOCK_SUB_DIAGONAL_OFFSET_FIRST_ROW
513 | +-\---+ v RSB_BLOCK_SUB_DIAGONAL_OFFSET_LAST_ROW
514 | \
515 | \
516 v
517 */
518 #define RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS) \
519 ((RSB_POINT_UNDER_DIAGONAL((ROW),(COL))) ? (ROW) : (COL) )
520
521 #define RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS) \
522 ((RSB_POINT_UNDER_DIAGONAL((((ROW)+(ROWS))-1),(((COL)+(COLS))-1))) ? (((COL)+(COLS))-1):(((ROW)+(ROWS))-1) )
523
524 #define RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS,OFFSET) \
525 (RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW(((ROW)+(OFFSET)),(COL),ROWS,COLS))
526
527 /*#define RSB_BLOCK_SUPRA_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS,OFFSET) */
528 /* RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW((ROW)+(OFFSET),COL,ROWS,COLS)*/
529
530 #define RSB_BLOCK_SUB_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS,OFFSET) \
531 (RSB_BLOCK_DIAGONAL_OFFSET_FIRST_ROW(ROW,(COL)+(OFFSET),ROWS,COLS))
532
533 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_FIRST_ROW(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET) \
534 ((RSB_POINT_UNDER_DIAGONAL((ROW)+(UOFFSET),(COL)+(LOFFSET))) ? (ROW) : (COL)+(LOFFSET)-(UOFFSET) )
535
536 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_FIRST_COL(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET) \
537 RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_FIRST_ROW(COL,ROW,COLS,ROWS,UOFFSET,LOFFSET)
538
539 #define RSB_BLOCK_SUB_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS,OFFSET) \
540 RSB_BLOCK_DIAGONAL_OFFSET_LAST_ROW((ROW),(COL)+(OFFSET),ROWS,COLS)
541
542 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_LAST_ROW(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET) \
543 ((RSB_POINT_UNDER_DIAGONAL((ROW)+((ROWS)-1)+(UOFFSET),(COL)+((COLS)-1)+(LOFFSET))) ? (COL)+((COLS)-1)+(LOFFSET)-(UOFFSET) : (ROW)+(ROWS)-1)
544
545 #define RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_LAST_COL(ROW,COL,ROWS,COLS,LOFFSET,UOFFSET) \
546 RSB_BLOCK_SUB_OR_SUPRA_DIAGONAL_OFFSET_LAST_ROW(COL,ROW,COLS,ROWS,UOFFSET,LOFFSET)
547
548
549 /* pure VBR, with no trailing structs : */
550
551 /* row major order (default) : */
552
553 #define RSB_GET_NEXT_BLOCK_POINTER(BP,M,ROWVAR,COLVAR,BLOCKROWSVAR,BLOCKCOLSVAR,BLOCKROWVAR,BLOCKCOLUMNVAR) \
554 /* \
555 * *input* \
556 * M should be a valid rsb_mtx_t structure pointer \
557 * *output* \
558 * ROWVAR will be set to the base row of this block \
559 * COLVAR will be set to the base column of this block \
560 * BLOCKROWSVAR will be set to the rows count of this block \
561 * BLOCKCOLSVAR will be set to the column count of this block \
562 * BP will be set to the current block pointer \
563 * */ \
564 ++_k; \
565 if(_k>=(M)->bpntr[*_pi+1]) \
566 { \
567 ++*_pi; /* new blocks row */ \
568 while( (M)->bpntr[*_pi] == (M)->bpntr[*_pi+1] ) /* skipping empty rows */ \
569 ++*_pi; \
570 } \
571 *_pj=(M)->bindx[_k]; /* the current block column index */ \
572 _lastk=_k; \
573 (BLOCKROWVAR)=_i; \
574 (BLOCKCOLUMNVAR)=_j; \
575 (ROWVAR)=(M)->rpntr[_i]; /* _i is the current block row index */ \
576 (COLVAR)=(M)->cpntr[_j]; /* the current block column index */ \
577 /*(BLOCKROWSVAR)=(M)->rpntr[_i+1]-(M)->rpntr[_i];*/ /* the current block rows count */ \
578 /*(BLOCKCOLSVAR)=(M)->cpntr[_j+1]-(M)->cpntr[_j];*/ /* the current block columns count */ \
579 /*(BP)=(rsb_byte_t*)((M)->VA ) + (M)->el_size * (M)->indptr[_k] ; */ \
580 (BLOCKROWSVAR)=GET_BLOCK_HEIGHT(_i,(M)); /* the current block rows count */ \
581 (BLOCKCOLSVAR)=GET_BLOCK_WIDTH( _j,(M)); /* the current block rows count */ \
582 (BP)=(rsb_byte_t*)(RSB_BLOCK_ADDRESS((M),_k)); \
583 ;
584
585 /* row major order : */
586 #define RSB_GET_FIRST_BLOCK_POINTER(BP,M,ROWVAR,COLVAR,BLOCKROWSVAR,BLOCKCOLSVAR,BLOCKROWVAR,BLOCKCOLUMNVAR) \
587 rsb_blk_idx_t _i=0,_j=0; \
588 rsb_blk_idx_t *_pi=NULL,*_pj=NULL; \
589 rsb_nnz_idx_t _k=0,_lastk=0; \
590 if((M)->flags&RSB_FLAG_WANT_COLUMN_MAJOR_ORDER){_pi=&_j;_pj=&_i;}else{_pi=&_i;_pj=&_j;} \
591 while( (M)->bpntr[*_pi] == (M)->bpntr[*_pi+1] ) \
592 ++*_pi; \
593 _k=(M)->bpntr[*_pi]; /* _k is the first block index for the current row of blocks */ \
594 *_pj=(M)->bindx[_k]; /* the current block column index */ \
595 (BLOCKROWVAR)=_i; \
596 (BLOCKCOLUMNVAR)=_j; \
597 (ROWVAR)=(M)->rpntr[_i]; /* _i is the current block row index */ \
598 (COLVAR)=(M)->cpntr[_j]; /* the current block column index */ \
599 (BLOCKROWSVAR)=GET_BLOCK_HEIGHT(_i,(M)); /* the current block rows count */ \
600 (BLOCKCOLSVAR)=GET_BLOCK_WIDTH( _j,(M)); /* the current block rows count */ \
601 (BP)=(rsb_byte_t*)(RSB_BLOCK_ADDRESS((M),_k));
602 #define RSB_GOT_LAST_BLOCK_POINTER(M) ( _lastk >= (M)->block_count )
603
604
605 #define RSB_POINT_IN_BOX(R0,C0,RH,CW,R,C) ( ((R)>=(R0)) && (R)<((R0)+(RH)) && ((C)>=(C0)) && (C)<((C0)+(CW)) )
606 #define RSB_MATRIX_CONTAINS(M,R,C) ( RSB_POINT_IN_BOX((M)->roff,(M)->coff,(M)->nr,(M)->nc,R,C) )
607 #define RSB_SUBMATRIX_CONTAINS_ROW(M,R) ( RSB_POINT_IN_BOX((M)->roff,0,(M)->nr,0,R,1) )
608 #define RSB_SUBMATRIX_INTERSECTS_COLS(M,C0,C1) \
609 ( ( (M)->coff <= (C1) ) && ( (M)->coff+(M)->nc > (C0) ) )
610 #define RSB_SUBMATRIX_INTERSECTS_ROWS(M,R0,R1) \
611 ( ( (M)->roff <= (R1) ) && ( (M)->roff+(M)->nr > (R0) ) )
612
613 #define RSB_SUBMATRIX_INTERSECTS_BOX(M,R0,R1,C0,C1) \
614 (RSB_SUBMATRIX_INTERSECTS_ROWS(M,R0,R1)&&RSB_SUBMATRIX_INTERSECTS_COLS(M,C0,C1))
615
616 #define RSB_FIND_SUBMATRIX_CONTAINING(M,R,C) ( \
617 ((M)->sm[0]&&RSB_MATRIX_CONTAINS((M)->sm[0],R,C)?(M)->sm[0]: \
618 ((M)->sm[1]&&RSB_MATRIX_CONTAINS((M)->sm[1],R,C)?(M)->sm[1]: \
619 ((M)->sm[2]&&RSB_MATRIX_CONTAINS((M)->sm[2],R,C)?(M)->sm[2]: \
620 ((M)->sm[3]&&RSB_MATRIX_CONTAINS((M)->sm[3],R,C)?(M)->sm[3]:NULL )))))
621
622 #define RSB_SUBMATRIX_INDEX(M,I,J) (M->sm[(I)*2+(J)])
623 /*
624 * this should be fixed. we would prefer to use intrinsics here. TODO
625 * */
626 #define RSB_FABS(x) ((x)<(0)?(-x):(x))
627
628 /*!
629 * Misc macros.
630 */
631 #define RSB_ASSIGN_IF_ZERO(VAR,VAL) if( (VAR) == 0) (VAR) = (VAL);
632 #define RSB_SWAP(TYPE,X,Y) {TYPE __tmp=(X);(X)=(Y);(Y)=(__tmp);}
633
634 #define RSB_SUBMATRIX_FOREACH_(matrix,submatrix,smi,smj,smk) \
635 /*int smk;*/ \
636 for(smk=0;smk<4;++smk) \
637 if( (smi=smk/2) >=0 && (smj=smk%2) >= 0 && (submatrix=(matrix)->sm[smi*2+(smj)] ) ) \
638 /* NOTE : handle with care (the 'submatrix' pointer could be NULL) */ \
639
640 #define RSB_SUBMATRIX_FOREACH_REVERSE(matrix,submatrix,smi,smj) \
641 /*int smi,smj;*/ \
642 for(smi=1;smi+1>0;--smi)/* fisrt smi, then smj, or will break spmv_uxux, ... */ \
643 for(smj=1,submatrix=matrix->sm[smi*2+smj]; \
644 smj+1>0; \
645 --smj,submatrix=(smi<2 && smj<2)?matrix->sm[smi*2+(smj)]:NULL) \
646 /* NOTE : handle with care (the 'submatrix' pointer could be NULL) */ \
647
648 #define RSB_SUBMATRIX_FOREACH(MTXAP,submatrix,smi,smj) \
649 /*int smi,smj;*/ \
650 for(smi=0;smi<2;++smi)/* fisrt smi, then smj, or will break spmv_uxux, ... */ \
651 for(smj=0,submatrix=MTXAP->sm[smi*2+smj]; \
652 smj<2; \
653 ++smj,submatrix=(smi<2 && smj<2)?MTXAP->sm[smi*2+(smj)]:NULL) \
654 /* NOTE : handle with care (the 'submatrix' pointer could be NULL) */ \
655
656 /* The following is incorrect, as it accesses one further pointer. */
657 /*
658 #define RSB_SUBMATRIX_FOREACH_LEAF(MTXAP,submatrix,smi) \
659 for( (smi)=0,submatrix=(MTXAP)->all_leaf_matrices[(smi)].mtxlp; \
660 (smi)<(MTXAP)->all_leaf_matrices_n; \
661 ++(smi),submatrix=(MTXAP)->all_leaf_matrices[smi].mtxlp) \
662 */
663
664 /* The following is correct, even if less elegant because of the bad style of assignment. */
665 #define RSB_SUBMATRIX_FOREACH_LEAF(MTXAP,submatrix,smi) \
666 for( (smi)=0; \
667 ((smi)<(MTXAP)->all_leaf_matrices_n) && ( submatrix=(MTXAP)->all_leaf_matrices[(smi)].mtxlp ); \
668 ++(smi))
669
670 #define RSB_SUBMATRIX_FOREACH_LEAF_PERMUTED(MTXAP,submatrix,smi,PV) \
671 for( (smi)=0; \
672 ((smi)<(MTXAP)->all_leaf_matrices_n) && ( submatrix=(MTXAP)->all_leaf_matrices[PV[(smi)]].mtxlp ); \
673 ++(smi))
674
675 #define RSB_SUBMATRIX_IS_ON_DIAG(matrix) ((matrix)->roff==(matrix)->coff)
676 #define RSB_SUBMATRIX_IS_LOWDIAG(matrix) ((matrix)->roff>(matrix)->coff)
677 #define RSB_SUBMATRIX_IS_UPPDIAG(matrix) ((matrix)->roff<(matrix)->coff)
678
679 #define RSB_SUBMATRIX_FOREACH_DIAG_LEAF(matrix,submatrix,smi) \
680 RSB_SUBMATRIX_FOREACH_LEAF(matrix,submatrix,smi) \
681 if(RSB_SUBMATRIX_IS_ON_DIAG(submatrix))
682
683 #define RSB_SUBMATRIX_FOREACH_LOWDIAG_LEAF(matrix,submatrix,smi) \
684 RSB_SUBMATRIX_FOREACH_LEAF(matrix,submatrix,smi) \
685 if(RSB_SUBMATRIX_IS_LOWDIAG(submatrix))
686
687 #define RSB_SUBMATRIX_FOREACH_UPPDIAG_LEAF(matrix,submatrix,smi) \
688 RSB_SUBMATRIX_FOREACH_LEAF(matrix,submatrix,smi) \
689 if(RSB_SUBMATRIX_IS_UPPDIAG(submatrix))
690
691 #define RSB_SUBMATRIX_COLS_INTERSECTION_FIRST(matrix,C) \
692 RSB_MAX(((matrix)->coff),(C))
693
694 #define RSB_SUBMATRIX_COLS_INTERSECTION_LAST(matrix,C) \
695 RSB_MIN(((matrix)->coff+(matrix->nc-1)),(C)) /* FIXME: requires matrix->nc > 0 */
696
697 #define RSB_SUBMATRIX_ROWS_INTERSECTION_FIRST(matrix,R) \
698 RSB_MAX(((matrix)->roff),(R))
699
700 #define RSB_SUBMATRIX_ROWS_INTERSECTION_LAST(matrix,R) \
701 RSB_MIN(((matrix)->roff+(matrix->nr-1)),(R)) /* FIXME: requires matrix->nr > 0 */
702
703 #define RSB_BCSS_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn,BR,BC) \
704 RSB_DEBUG_ASSERT((matrix)->VA); \
705 RSB_DEBUG_ASSERT((matrix)->el_size>0); \
706 RSB_DEBUG_ASSERT((matrix)->br>0 && (matrix)->bc>0); \
707 blockpointer=(matrix)->VA; \
708 for( bri=0, \
709 baserow=(bri)*(BR); \
710 bri<(matrix)->Mdim; \
711 ++bri, \
712 baserow=(bri)*(BR) \
713 ) \
714 for( bi=(matrix)->bpntr[bri], \
715 bci=(matrix)->bindx[bi], \
716 basecolumn=(bci)*(BC); \
717 bi<(matrix)->bpntr[(bri)+1]; \
718 ++bi, \
719 blockpointer=((rsb_byte_t*)blockpointer)+(matrix)->el_size*(BR)*(BC), \
720 bci=(matrix)->bindx[bi], \
721 baserow=(bri)*(BR), \
722 basecolumn=(bci)*(BC) \
723 )
724
725 #define RSB_BCSR_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn) \
726 RSB_BCSS_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn,matrix->br,matrix->bc)
727
728 #define RSB_BCSC_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bri,bci,blockindex,baserow,basecolumn) \
729 RSB_BCSS_MATRIX_FOREACH_BLOCK(matrix,blockpointer,bci,bri,blockindex,basecolumn,baserow,matrix->bc,matrix->br)
730
731 #define RSB_CONST_ENOUGH_BYTES_FOR_ANY_TYPE 32 /** should adapt this in case of need */
732 #define RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE (RSB_CONST_ENOUGH_BYTES_FOR_ANY_TYPE/sizeof(rsb_aligned_t)) /** should adapt this in case of need */
733
734
735 #define RSB_INTERNAL_FLAG_CSR_SORTING_MASK (RSB_FLAG_QUAD_PARTITIONING | RSB_FLAG_OBSOLETE_BLOCK_ASYMMETRIC_Z_SORTING)
736
737 #define RSB_DO_FLAGS_EXTRACT_STORAGE(F) ( \
738 /*((F) & RSB_FLAG_WANT_LINKED_STORAGE) */ 0 | \
739 ((F) & RSB_FLAG_WANT_COO_STORAGE) | \
740 ((F) & RSB_FLAG_WANT_FIXED_BLOCKING_VBR) | \
741 ((F) & RSB_FLAG_WANT_BCSS_STORAGE) | \
742 RSB_FLAG_NOFLAGS )
743
744 #define RSB__FLAG_HAS_UNSPECIFIED_TRIANGLE(FLAGS) (RSB_DO_FLAG_HAS((FLAGS),RSB_FLAG_TRIANGULAR) && !RSB_DO_FLAG_HAS_INTERSECTION((FLAGS),RSB_FLAG_LOWER|RSB_FLAG_UPPER))
745
746 #if 1
747 #define rsb_do_spmv(TRANSA,ALPHAP,MTXAP,XP,INCX,BETAP,YP,INCY) \
748 rsb__do_spmv_general(TRANSA,ALPHAP,MTXAP,XP,INCX,BETAP,YP,INCY,(RSB_OP_FLAG_DEFAULT) RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS)
749 #else
rsb_do_spmv(rsb_trans_t transA,const void * alphap,const struct rsb_mtx_t * mtxAp,const void * Xp,rsb_coo_idx_t incX,const void * betap,void * Yp,rsb_coo_idx_t incY)750 rsb_err_t rsb_do_spmv(rsb_trans_t transA, const void *alphap, const struct rsb_mtx_t * mtxAp, const void * Xp, rsb_coo_idx_t incX, const void * betap, void * Yp, rsb_coo_idx_t incY)
751 {
752 rsb_err_t errval = rsb__do_spmv_general(transA,alphap,mtxAp,Xp,incX,betap,Yp,incY,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS);
753 return errval;
754 }
755 #endif
756
757 /* We may use custom memcpy functions. */
758 #define RSB_MEMCPY(DST,SRC,BYTES) rsb__memcpy((DST),(SRC),(BYTES))
759
760 #define RSB_A_BZERO(ID,DOFF,NNZ,ES) \
761 RSB_BZERO(((rsb_byte_t*)(ID))+(ES)*(DOFF),(ES)*(NNZ)) \
762
763 #define RSB_A_MEMCPY(ID,IS,DOFF,SOFF,NNZ,ES) \
764 RSB_MEMCPY(((rsb_char_t*)(ID))+(ES)*(DOFF),((const rsb_char_t*)(IS))+(ES)*(SOFF),(ES)*(NNZ)) \
765
766 #define RSB_A_MEMMOVE(ID,IS,DOFF,SOFF,NNZ,ES) \
767 RSB_MEMMOVE(((rsb_char_t*)(ID))+(ES)*(DOFF),((const rsb_char_t*)(IS))+(ES)*(SOFF),(ES)*(NNZ)) \
768
769 #define RSB_COA_MEMCPY(ID,IS,DOFF,SOFF,NNZ) \
770 RSB_MEMCPY(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)) \
771
772 #define RSB_COA_MEMCPY2H(ID,IS,DOFF,SOFF,NNZ,ADD) \
773 { \
774 rsb_nnz_idx_t RSB_DUMMY_ID=0; \
775 for(RSB_DUMMY_ID=0;RSB_DUMMY_ID<(NNZ);++RSB_DUMMY_ID) \
776 ((rsb_half_idx_t*)(ID))[(DOFF)+(RSB_DUMMY_ID)]= \
777 ((const rsb_coo_idx_t*)IS)[(SOFF)+(RSB_DUMMY_ID)]+(ADD); \
778 }
779
780 #define RSB_COA_MEMMOVE(ID,IS,DOFF,SOFF,NNZ) \
781 RSB_MEMMOVE(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)) \
782
783 #define RSB_COO_MEMMOVE(VD,ID,JD,VS,IS,JS,DOFF,SOFF,NNZ,ES) \
784 RSB_MEMMOVE(((rsb_char_t*)(VD))+(ES)*(DOFF),((const rsb_char_t*)(VS))+(ES)*(SOFF),(ES)*(NNZ)), \
785 RSB_MEMMOVE(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)), \
786 RSB_MEMMOVE(((rsb_coo_idx_t*)(JD))+(DOFF),((const rsb_coo_idx_t*)(JS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ))
787
788 #define RSB_COO_MEMCPY(VD,ID,JD,VS,IS,JS,DOFF,SOFF,NNZ,ES) \
789 RSB_MEMCPY(((rsb_char_t*)(VD))+(ES)*(DOFF),((const rsb_char_t*)(VS))+(ES)*(SOFF),(ES)*(NNZ)), \
790 RSB_MEMCPY(((rsb_coo_idx_t*)(ID))+(DOFF),((const rsb_coo_idx_t*)(IS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ)), \
791 RSB_MEMCPY(((rsb_coo_idx_t*)(JD))+(DOFF),((const rsb_coo_idx_t*)(JS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ))
792
793 #define RSB_CSR_MEMCPY(VD,ID,JD,VS,IS,JS,NNZ,NR,ES) \
794 RSB_MEMCPY(((rsb_char_t *)(VD)),((const rsb_char_t *)(VS)),(ES)*(NNZ)), \
795 RSB_MEMCPY(((rsb_nnz_idx_t*)(ID)),((const rsb_nnz_idx_t*)(IS)),sizeof(rsb_nnz_idx_t)*(NR)), \
796 RSB_MEMCPY(((rsb_coo_idx_t*)(JD)),((const rsb_coo_idx_t*)(JS)),sizeof(rsb_coo_idx_t)*(NNZ))
797
798 #define RSB_CSR2COO_MEMCPY(VD,ID,JD,VS,I,JS,DOFF,SOFF,NNZ,ES) \
799 RSB_MEMCPY(((rsb_char_t*)(VD))+(ES)*(DOFF),((const rsb_char_t*)(VS))+(ES)*(SOFF),(ES)*(NNZ)), \
800 rsb__util_coo_array_set(((rsb_coo_idx_t*)(ID))+(DOFF),(NNZ),(I)), \
801 RSB_MEMCPY(((rsb_coo_idx_t*)(JD))+(DOFF),((const rsb_coo_idx_t*)(JS))+(SOFF),sizeof(rsb_coo_idx_t)*(NNZ))
802
803 #define RSB_COO_MEMCPY_parallel(VD,ID,JD,VS,IS,JS,DOFF,SOFF,NNZ,ES) \
804 RSB_A_MEMCPY_parallel(VD,VS,DOFF,SOFF,NNZ,ES), \
805 RSB_COA_MEMCPY_parallel(ID,IS,DOFF,SOFF,NNZ), \
806 RSB_COA_MEMCPY_parallel(JD,JS,DOFF,SOFF,NNZ)
807
808 #define RSB_FCOO_ASUM(S,X,LI,UI) {rsb_coo_idx_t i; for(i=(LI);i<(UI);++i)(S)+=(X)[i];}
809 #define RSB_XCOO_ISET(X, LI,UI) {rsb_coo_idx_t i; for(i=(LI);i<(UI);++i)(X)[i]=i-(LI);}
810 #define RSB_FCOO_ISET RSB_XCOO_ISET
811 #define RSB_XCOO_VSET(X,V,LI,UI) {rsb_coo_idx_t i; for(i=(LI);RSB_LIKELY((i)<(UI));++i)(X)[(i)] =(V);}
812 #define RSB_XCOO_VADD(X,V,LI,UI) {rsb_coo_idx_t i; for(i=(LI);RSB_LIKELY((i)<(UI));++i)(X)[(i)]+=(V);}
813 #define RSB_XCOO_IREN /* TODO: to write one */
814
815 #define RSB_NNZ_OF(MTXAP) ((MTXAP)?((MTXAP)->nnz):0)
816 #define RSB_TYPED_OFF_PTR(TYPECODE,VA,OFF) (((rsb_byte_t*)(VA))+(((size_t)(RSB_SIZEOF(TYPECODE))*(OFF))))
817 #define RSB_COO_LT(I1,J1,I2,J2) ( (I1) < (I2) || ( (I1) == (I2) && ( (J1) < (J2) ) ) )
818 #define RSB_COO_GT(I1,J1,I2,J2) RSB_COO_LT(I2,J2,I1,J1)
819 #define RSB_COO_GE(I1,J1,I2,J2) ( !RSB_COO_LT(I1,J1,I2,J2) )
820 #define RSB_COO_EQ(I1,J1,I2,J2) ( (I1) == (I2) && ( (J1) == (J2) ) )
821
822 /*!
823 * \ingroup gr_internals
824 * \brief An internal, helper structure.
825 */
826 struct rsb_memory_level_t
827 {
828 size_t size; /* */
829 size_t level; /* */
830 size_t associativity; /* */
831 size_t linesize; /* */
832 };
833
834 #define RSB_MEGABYTE (1024*1024)
835 #define RSB_MEGABYTE_SYM "MiB"
836
837 #define RSB_DEFAULT_STREAM stdout
838 #define RSB_DIR_SEPARATOR '/' /* */
839 #define RSB_MAX_STRERRLEN 128 /* */
840 #define RSB_MAX_LINE_LENGTH 1025 /* */
841 #define RSB_MAX_COMPILE_COMMAND_LENGTH 1025 /* */
842 #define RSB_MAX_VERSION_STRING_LENGTH 4096 /* */
843 #define RSB_MAX_FILENAME_LENGTH RSB_MAX_LINE_LENGTH /* the maximal supported file name length (in buffers) */
844
845 #define RSB_MAX_SUPPORTED_CACHE_LEVELS 32L /* the maximal supported height of memory hierarchy */
846 #define RSB_MIN_THREAD_MEMCPY_NNZ 1024 /* minimal count of nonzeros to move for a thread during parallel memcpy */
847 #define RSB_MIN_THREAD_BZERO_BYTES 8192 /* minimal count of nonzeros to bzero for a thread during parallel bzero */
848 #define RSB_MIN_THREAD_XAXPY_NNZ 256 /* 1024 */ /* minimal count of elements for a parallel vector-vector operation */
849 #define RSB_MIN_THREAD_SORT_NNZ 256 /* minimal count of nonzeros to sort for a thread during parallel sort */
850
851 #define RSB_POWER_OF_2(N) (1<<(N))
852 #define RSB_FRAC(Q,D) (((Q)+((D)-1))/(D))
853 #define RSB_MIDDLE(X) RSB_FRAC(X,2)
854 #define RSB_IS_INTEGER_ODD(X) ( (X)&0x01)
855 #define RSB_IS_INTEGER_EVEN(X) (!RSB_IS_INTEGER_ODD(X))
856
857 #define RSB_HAVE_STREAMS RSB_HAVE_STDIO_H
858
859 #if defined(RSB_WANT_LIBRSB_STATS) && (RSB_WANT_LIBRSB_STATS> 0)
860 #define RSB_WANT_LIBRSB_TIMER 1
861 #else
862 #define RSB_WANT_LIBRSB_TIMER 0
863 #endif
864
865
866 /*!
867 * \ingroup gr_internals
868 * \brief An internal, helper structure.
869 */
870 struct rsb_session_handle_t
871 {
872 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
873 /*!
874 * A global memory counter, used for debugging purposes.
875 * */
876 size_t allocated_memory; /* total of allocated memory, in bytes */
877 size_t allocations_count; /* total number of current allocations */
878 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
879 size_t min_leaf_matrix_bytes; /* */
880 size_t avg_leaf_matrix_bytes; /* */
881 size_t rsb_g_threads; /* detected threads */
882 #if RSB_WANT_PERFORMANCE_FILE
883 /*rsb_byte_t * performance_binary_dump_file;*/ /* TODO: obsolete feature */
884 rsb_char_t * performance_binary_dump_file; /* TODO: obsolete feature */
885 #endif /* RSB_WANT_PERFORMANCE_FILE */
886 /* beginning of user settable variables declarations */
887 size_t rsb_want_threads; /* RSB_IO_WANT_EXECUTING_THREADS ; active threads (may be <> rsb_g_threads) */
888 rsb_int_t asm_sort_method; /* RSB_IO_WANT_SORT_METHOD */
889 rsb_real_t subdivision_multiplier; /* RSB_IO_WANT_SUBDIVISION_MULTIPLIER */
890 rsb_int_t want_bounded_box; /* RSB_IO_WANT_BOUNDED_BOX_COMPUTATION */
891 rsb_int_t cache_blocking_method; /* RSB_IO_WANT_CACHE_BLOCKING_METHOD */
892 rsb_int_t want_outer_spmm; /* RSB_IO_WANT_LEAF_LEVEL_MULTIVEC */
893 #if RSB_HAVE_STREAMS
894 FILE * out_stream; /* RSB_IO_WANT_OUTPUT_STREAM */
895 FILE * error_stream; /* RSB_IO_WANT_VERBOSE_ERRORS */
896 FILE * init_stream; /* RSB_IO_WANT_VERBOSE_INIT */
897 FILE * exit_stream; /* RSB_IO_WANT_VERBOSE_EXIT */
898 #endif /* RSB_HAVE_STREAMS */
899 const rsb_char_t * mhis; /* RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING ; set via rsb_lib_reinit */
900 #if RSB_WANT_DEBUG_VERBOSE_INTERFACE_NOTICE
901 rsb_int_t rsb_g_verbose_interface; /* RSB_IO_WANT_EXTRA_VERBOSE_INTERFACE */
902 #endif /* RSB_WANT_DEBUG_VERBOSE_INTERFACE_NOTICE */
903 /* end of user settable variables declarations */
904 long memory_hierarchy_levels; /* */
905 struct rsb_memory_level_t caches[RSB_MAX_SUPPORTED_CACHE_LEVELS]; /* 0,..,memory_hierarchy_levels-1*/
906 rsb_bool_t rsb_g_initialized; /* */
907 #if RSB_WANT_ALLOCATOR_LIMITS
908 size_t memory_count_max; /* */
909 size_t allocations_count_max; /* */
910 #endif /* RSB_WANT_ALLOCATOR_LIMITS */
911 #if RSB_WANT_LIBRSB_TIMER
912 rsb_time_t etime;
913 #endif /* RSB_WANT_LIBRSB_TIMER */
914 rsb_int_t verbose_tuning; /* */
915 };
916
917 #define RSB_INTERNALS_COMMON_HEAD_DECLS extern struct rsb_session_handle_t rsb_global_session_handle;
918 #define RSB_DO_ERROR_CUMULATE(ERRVAL,ERRFLAG) RSB_DO_FLAG_ADD((ERRVAL),(ERRFLAG))
919
920 #define RSB_IF_NOT_NULL_CAST_TO(P,TYPE,FALLBACK) ((P)?*(TYPE*)(P):(FALLBACK))
921 #define RSB_IF_NOT_NULL_SET_TO_CASTED(V,P,TYPE) {if((P)!=NULL){(V)=*(TYPE*)(P);}}
922 #define RSB_IF_NOT_NULL_GET_TO_CASTED(V,P,TYPE) {if((P)!=NULL){*(TYPE*)(P)=(V);}}
923 #define RSB_IF_NOT_NULL_GET_SET_TO_CASTED(V,P,TYPE,F,ERRVAL) { \
924 switch(F){ \
925 case(RSB_IO_SPECIFIER_GET): \
926 RSB_IF_NOT_NULL_GET_TO_CASTED((V),(P),TYPE);break; \
927 case(RSB_IO_SPECIFIER_SET): \
928 RSB_IF_NOT_NULL_SET_TO_CASTED((V),(P),TYPE);break; \
929 default: RSB_DO_ERROR_CUMULATE(ERRVAL,RSB_ERR_BADARGS); }}
930 #define rsb__sprintf sprintf
931
932 #define RSB_BLAS_ERROR -1 /* */
933 #define RSB_BLAS_NO_ERROR 0 /* */
934 #define RSB_BLAS_ERROR_UNSUPPORTED RSB_BLAS_ERROR /* TODO: spread usage of this throughout the code */
935 #define RSB_BLAS_ERROR_UNIMPLEMENTED RSB_BLAS_ERROR /* TODO: spread usage of this throughout the code */
936 #define RSB_BLAS_ERROR_WRONG_USGP_ARG RSB_BLAS_ERROR /* TODO: spread usage of this throughout the code */
937
938 #define RSB_SET_IF_NOT_NULL(P,V) if((P)!=NULL)*(P)=V
939 typedef int rsb_blas_int_t;
940 typedef double rsb_aligned_t; /* see RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE and RSB_CONST_ENOUGH_BYTES_FOR_ANY_TYPE */
941
942
943 #define RSB_MASK_OUT_SOME_ERRORS(ERRVAL) {if((ERRVAL)==RSB_ERR_UNSUPPORTED_FEATURE)(ERRVAL)=RSB_ERR_NO_ERROR;}/* NOTE; this is a macro only used to prevent the test suite to complain for failing printouts when output is disabled! */
944
945 /*!
946 Macros to get indices types liminal values, configuration-dependent.
947 */
948 #define RSB_COO_HALF_BITS_SIZE ((sizeof(rsb_coo_idx_t)*RSB_CHAR_BIT)/2)
949
950 #define RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS {int ___foo=1;++___foo;}/* will avoid things like error: label at end of compound statement */
951
952 #if RSB_WANT_ZLIB_SUPPORT
953 #define RSB_FOPEN(X,Y) gzopen((X),(Y))
954 #define RSB_FCLOSE(X) gzclose(X)
955 #else /* RSB_WANT_ZLIB_SUPPORT */
956 #define RSB_FOPEN(X,Y) fopen((X),(Y))
957 #define RSB_FCLOSE(X) fclose(X)
958 #endif /* RSB_WANT_ZLIB_SUPPORT */
959
960 #define RSB_EMPTY_FILE_FILLER static int foo(void){return 0;}
961
962 #define RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,TYPE) \
963 TYPE *IA=(TYPE*)(MATRIX)->bpntr; \
964 TYPE *JA=(TYPE*)(MATRIX)->bindx;
965
966 #define RSB_DECLARE_COO_IARRAY_FROM_MATRIX(IA,MATRIX,TYPE) \
967 TYPE *IA=(TYPE*)(MATRIX)->bpntr;
968
969 #define RSB_DECLARE_COO_JARRAY_FROM_MATRIX(JA,MATRIX,TYPE) \
970 TYPE *JA=(TYPE*)(MATRIX)->bindx;
971
972 #define RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,PTYPE,ITYPE) \
973 PTYPE *PA=(PTYPE*)(MATRIX)->bpntr; \
974 ITYPE *JA=(ITYPE*)(MATRIX)->bindx;
975
976 #define RSB_DECLARE_CONST_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) \
977 RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,const rsb_nnz_idx_t,const rsb_half_idx_t)
978
979 #define RSB_DECLARE_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) \
980 RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,rsb_nnz_idx_t,rsb_half_idx_t)
981
982 #define RSB_DECLARE_CONST_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) \
983 RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,const rsb_nnz_idx_t,const rsb_coo_idx_t)
984
985 #define RSB_DECLARE_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX) \
986 RSB_DECLARE_CSR_ARRAYS_FROM_MATRIX(PA,JA,MATRIX,rsb_nnz_idx_t,rsb_coo_idx_t)
987
988 #define RSB_DECLARE_CONST_HALFCOO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX) \
989 RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,const rsb_half_idx_t)
990
991 #define RSB_DECLARE_CONST_HALFCOO_IARRAY_FROM_MATRIX(IA,MATRIX) \
992 RSB_DECLARE_COO_IARRAY_FROM_MATRIX(IA,MATRIX,const rsb_half_idx_t)
993
994 #define RSB_DECLARE_CONST_HALFCOO_JARRAY_FROM_MATRIX(JA,MATRIX) \
995 RSB_DECLARE_COO_JARRAY_FROM_MATRIX(JA,MATRIX,const rsb_half_idx_t)
996
997 #define RSB_DECLARE_CONST_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX) \
998 RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,const rsb_coo_idx_t)
999
1000 #define RSB_DECLARE_CONST_FULLCOO_IARRAY_FROM_MATRIX(IA,MATRIX) \
1001 RSB_DECLARE_COO_IARRAY_FROM_MATRIX(IA,MATRIX,const rsb_coo_idx_t)
1002
1003 #define RSB_DECLARE_CONST_FULLCOO_JARRAY_FROM_MATRIX(JA,MATRIX) \
1004 RSB_DECLARE_COO_JARRAY_FROM_MATRIX(JA,MATRIX,const rsb_coo_idx_t)
1005
1006 #define RSB_DECLARE_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX) \
1007 RSB_DECLARE_COO_ARRAYS_FROM_MATRIX(IA,JA,MATRIX,rsb_coo_idx_t)
1008
1009 /*!
1010 * If the restrict keyword is supported, we use it in our declarations.
1011 * */
1012 /* #ifdef restrict */
1013 #ifdef RSB_restrict
1014 #define RSB_RESTRICT restrict
1015 #else /* RSB_restrict */
1016 #define RSB_RESTRICT
1017 #endif /* RSB_restrict */
1018
1019 #define RSB_VA_OFFSET_POINTER(VA,ES,OFF) ((rsb_byte_t*)(VA)+(size_t)(ES)*(OFF))
1020 #define RSB_VA_OFFSET_POINTER_CONST(VA,ES,OFF) ((const rsb_byte_t*)(VA)+(size_t)(ES)*(OFF))
1021 #define RSB_VA_MEMCMP(LVA,LOFF,RVA,ROFF,ES) \
1022 RSB_MEMCMP(RSB_VA_OFFSET_POINTER((LVA),(ES),(LOFF)),RSB_VA_OFFSET_POINTER((RVA),(ES),(ROFF)),(ES))
1023
1024 /*!
1025 * \brief Auxiliary structure for a coo-stored matrix (usually for temporary operations).
1026 * */
1027 struct rsb_coo_matrix_t{
1028 rsb_coo_idx_t * IA, * JA;/** row and columns indices */
1029 rsb_coo_idx_t nr,nc; /** matrix (declared) nonzeros */
1030 rsb_nnz_idx_t nnz; /** matrix rows, columns */
1031 void * VA; /** values of data elements */
1032 rsb_type_t typecode; /** as specified in the RSB_NUMERICAL_TYPE_* preprocessor symbols in rsb_types.h */
1033 };
1034
1035 #define RSB_INIT_COO_FROM_MTX(COOP,MTXAP) { \
1036 (COOP)->nr=(MTXAP)->nr; \
1037 (COOP)->nc=(MTXAP)->nc; \
1038 (COOP)->nnz=(MTXAP)->nnz; \
1039 (COOP)->typecode=(MTXAP)->typecode; }
1040
1041 #define RSB_INIT_CXX_FROM_MTX(COOP,MTXAP) { \
1042 (COOP)->nr=(MTXAP)->nr; \
1043 (COOP)->nc=(MTXAP)->nc; \
1044 (COOP)->nnz=RSB_MAX((MTXAP)->nnz,1+RSB_MAX((MTXAP)->nr,(MTXAP)->nc)); \
1045 (COOP)->typecode=(MTXAP)->typecode; }
1046
1047 #define RSB_BIND_COO_TO_MTX(COOP,MTXAP) { \
1048 (COOP)->VA=(MTXAP)->VA; \
1049 (COOP)->IA=(MTXAP)->bpntr; \
1050 (COOP)->JA=(MTXAP)->bindx; }
1051
1052 #define RSB_FLAG_SOME_SYMMETRY (RSB_FLAG_HERMITIAN|RSB_FLAG_SYMMETRIC)
1053 #define RSB_FLAG_ALL_STRUCTURAL_FLAGS (RSB_FLAG_SOME_SYMMETRY|RSB_FLAG_DIAGONAL|RSB_FLAG_TRIANGULAR|RSB_FLAG_UNIT_DIAG_IMPLICIT)
1054 #define RSB_FLAG_ALL_DUPLICATE_FLAGS (RSB_FLAG_DUPLICATES_KEEP_LAST|RSB_FLAG_DUPLICATES_SUM)
1055 #define RSB_DUMMY_ID rsb_dummy_id
1056 #define RSB_DUMMY_MTX (NULL)
1057 #define RSB_DEFAULT_TEST_MATRIX_FILENAME "pd.mtx" /**< this file should always be included in the library distribution (FIXME: should enforce this) */
1058
1059 #define RSB_VECTORS_DIFF_DISPLAY_N 10
1060 #define RSB_VECTORS_DIFF_DISPLAY_N_SMALL 3
1061 #define RSB_DEFAULT_UNDEFINED_COO_VALUE 0
1062
1063 #define RSB_PSORT_CHUNK 10000 /* FIXME: hardcoded constants are bad (and the PGI compiler won't accept them) */
1064 #define RSB_MINIMUM_VECOP_OMP_CHUNK 1000 /* FIXME: hardcoded constants are bad (and the PGI compiler won't accept them) */
1065
1066 #define RSB_BOOL_IS_POINTER_NON_NULL(P) ((P)?RSB_BOOL_TRUE:RSB_BOOL_FALSE)
1067
1068 #define RSB_CONDITIONAL_ERRPSET(ERRVALP,ERRVAL) {if(ERRVALP)(*(ERRVALP)=(ERRVAL));}
1069 #define RSB_MTX_FREE(MTXAP) if(MTXAP){rsb__do_mtx_free(MTXAP);(MTXAP)=NULL;} /* frees the matrix and nullifies the associated pointer. */
1070
1071 /* #define RSB_FLAGS_RSB_AGNOSTIC RSB_FLAG_FORTRAN_INDICES_INTERFACE */
1072 /* #define RSB_FLAGS_RSB_NON_AGNOSTIC (RSB_FLAG_USE_HALFWORD_INDICES|RSB_FLAG_WANT_COO_STORAGE|RSB_FLAG_WANT_CSR_STORAGE) --- see RSB_DO_FLAGS_EXTRACT_STORAGE(flags) */
1073 #define RSB_FLAGS_RSB_AGNOSTIC (RSB_FLAG_FORTRAN_INDICES_INTERFACE|RSB_FLAG_UNIT_DIAG_IMPLICIT|RSB_FLAG_UPPER|RSB_FLAG_LOWER|RSB_FLAG_SORTED_INPUT|RSB_FLAG_TRIANGULAR|RSB_FLAG_SYMMETRIC|RSB_FLAG_HERMITIAN)
1074
1075 #define RSB_INDEX_FIT_IN_HALFWORD(I) ((I)<=RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t))
1076 #define RSB_INDICES_FIT_IN_HALFWORD(I,J) ( RSB_INDEX_FIT_IN_HALFWORD(I) && RSB_INDEX_FIT_IN_HALFWORD(J) )
1077
1078 #define RSB_IF_NOFLAGS_SET_DEFAULT_MATRIX_FLAGS(V) \
1079 if(RSB_DO_FLAG_FILTEROUT((V),RSB_FLAGS_RSB_AGNOSTIC)==RSB_FLAG_NOFLAGS) \
1080 RSB_DO_FLAG_ADD((V),RSB_FLAG_DEFAULT_MATRIX_FLAGS);
1081
1082 #define RSB_DIVIDE_IN_CHUNKS(N,NTHREADS) RSB_MAX(((N)+(NTHREADS)-1)/(NTHREADS),1)
1083 #define RSB_EXIT exit
1084 #define RSB_DO_ERR_RETURN(ERRVAL) {return (ERRVAL);}
1085 #define RSB_DO_MTX_RETURN(MATRIX,ERRVAL) {return (MATRIX);}
1086 #define RSB_FLAG_UPPTRI (RSB_FLAG_UPPER|RSB_FLAG_LOWER)
1087 #define RSB_DO_FLAG_FLIP_UPLO(V) {\
1088 if(RSB_DO_FLAG_HAS((V),RSB_FLAG_UPPER)) \
1089 RSB_DO_FLAG_DEL((V),RSB_FLAG_UPPER),RSB_DO_FLAG_ADD((V),RSB_FLAG_LOWER); \
1090 else \
1091 if(RSB_DO_FLAG_HAS((V),RSB_FLAG_LOWER)) \
1092 RSB_DO_FLAG_ADD((V),RSB_FLAG_UPPER),RSB_DO_FLAG_DEL((V),RSB_FLAG_LOWER); \
1093 }
1094 #define RSB_PERR_GOTO(LABEL,...) {RSB_ERROR(__VA_ARGS__);goto LABEL;}
1095 #define RSB_SERR_GOTO(LABEL) {goto LABEL;}
1096
1097 #define RSB_SYMMETRY_STRING(FLAGS) (RSB_DO_FLAG_HAS(FLAGS,RSB_FLAG_HERMITIAN)?"hermitian":(RSB_DO_FLAG_HAS(FLAGS,RSB_FLAG_SYMMETRIC)?"symmetric":"general"))
1098
1099 typedef float rsb_float_t;
1100 #define RSB_FLOAT_ONE 1.0f
1101
1102 #define RSB_RECURSION_MIN_DIM (2)
1103 #define RSB_RECURSION_MIN_NNZ (4)
1104
1105 /*!
1106 An integer type for thread indices.
1107 */
1108 typedef int rsb_thread_t;
1109
1110 /*! \internal */
1111 typedef rsb_flags_t rsb_order_t;
1112
1113
1114
1115 /** \internal \todo:OBSOLETE, REMOVE */
1116 #define RSB_FLAG_OBSOLETE_BLOCK_ASYMMETRIC_Z_SORTING 0x008000
1117
1118 /** \internal \todo:obsolete: FIXME */
1119 /*#define RSB_FLAG_BLOCK_ASYMMETRIC_Z_SORTED 0x010000*/
1120 /** \internal \todo:temporary fix: FIXME */
1121 #define RSB_FLAG_FIX_FOR_BINARY_LOADED_MATRIX 0x010000
1122
1123 /** \internal \todo:EXPERIMENTAL */
1124 #define RSB_FLAG_EXPERIMENTAL_IN_PLACE_CSR 0x020000
1125
1126 /** if set, the matrix will be partitioned with a block size chosen automatically */
1127 #define RSB_FLAG_AUTO_BLOCKING 0x80000000 /* FIXME: obsolete, unsupported */
1128
1129
1130 /** if set, will decide between column or row major for each (leaf) matrix (NEW: EXPERIMENTAL) */
1131 /*#define RSB_FLAG_WANT_AUTO_MAJOR_ORDER 0x200000 */ /* Unsupported */
1132
1133
1134 #if 0
1135 /** if set, the matrix .. */
1136 #define RSB_FLAG_WANT_RECURSIVELY_NON_UNIFORM_AUTO_BLOCKING 0x000200 /* experimental, but works well */
1137 #endif /* 0 */
1138
1139 /** if set, the matrix will take possession of partitioning arrays p_r and p_c on input. if unset, a copy will be made */
1140 #define RSB_FLAG_OWN_PARTITIONING_ARRAYS 0x000080 /* */
1141
1142 /** if set, the blocks will be linked in some way */
1143 /* FIXME: delete this flag */
1144 /*#define RSB_FLAG_WANT_LINKED_STORAGE 0x000400*/
1145
1146 /** if set, operating routines will check input more aggressively (may break operation) */
1147 #define RSB_FLAG_SHOULD_DEBUG 0x000800
1148
1149 /** if set, the block partitioning will be fixed but VBR or LBR (Unsupported) */
1150 #define RSB_FLAG_WANT_FIXED_BLOCKING_VBR 0x001000
1151
1152 /** if set, will mark a leaf matrix */
1153 #define RSB_FLAG_NON_ROOT_MATRIX 0x100000
1154
1155 /** if set, the matrix will be prevent from being subdivided too much (OUTLAWED) */
1156 /*#define RSB_FLAG_EXPERIMENTAL_NO_MICRO_LEAVES 0x4000000*/
1157
1158 /**
1159 * if set, the blocks will cycle column after column.
1160 * if RSB_FLAG_WANT_BCSS_STORAGE is also set, the matrix storage format will be BCSC.
1161 * otherwise it will be VBC.
1162 * */
1163 /* see rsb.h*/
1164 /*#define RSB_FLAG_WANT_COLUMN_MAJOR_ORDER 0x4000000*/
1165
1166
1167 /** if set, the code will sort the input */
1168 #define RSB_FLAG_SORT_INPUT 0x2000000 /* FIXME: delete this flag */
1169
1170
1171 /** a parameter to determine if a matrix is really 'small' or not (FIXME) */
1172 #define RSB_EXPERIMENTAL_MIN_LEAF_ELEMENTS 1024
1173
1174 /*#define RSB_FLAG_RECURSIVE_SHRINK_BOUNDING_BOX 0x40000000*/
1175
1176 #if 0
1177 /* only flags left : */
1178 #define RSB_FLAG_ALLOW_PARALLEL_OPERATION 0x40000000 /* NEW : UNUSED */
1179 #endif /* 0 */
1180
1181 #if 0
1182 #define RSB_FLAG_DEFAULT (RSB_FLAG_DISCARD_ZEROS /*| RSB_FLAG_WANT_BCSS_STORAGE*/ /* | RSB_FLAG_SORT_INPUT*/)
1183 #endif /* 0 */
1184
1185
1186 /**
1187 * \brief It is an internal structure, so beware, you should not use it.
1188 * \internal
1189 *
1190 * This structure will be used for keeping information about matrix partitioning.
1191 * It should be used primarily during matrix building, when the matrix arrays are
1192 * not all allocated.
1193 *
1194 * It is an internal structure, so beware, you should not use it.
1195 * */
1196 struct rsb_mtx_partitioning_info_t
1197 {
1198 rsb_blk_idx_t M_b, K_b; /**< just as in rsb_mtx_t */
1199 rsb_blk_idx_t br, bc; /**< block row and column size (only if BCSR) (NEW) */
1200 rsb_coo_idx_t *rpntr,*cpntr; /**< just as in rsb_mtx_t */
1201
1202 rsb_coo_idx_t nr,nc; /**< just as in rsb_mtx_t */
1203 rsb_submatrix_idx_t should_subdivide_levels; /**< for recursive partitioning (EXPERIMENTAL) */
1204 };
1205
1206
1207 typedef signed long rsb_long_t; /* FIXME :internals, (still) unused */
1208
1209 #define RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT__VAL 0x10
1210 #define RSB_OP_FLAG_WANT_SERIAL__VAL 0x2
1211 #define RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL_VAL (RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT__VAL+RSB_OP_FLAG_WANT_SERIAL__VAL)
1212 /*!
1213 * \ingroup gr_internals
1214 * \brief An internal, helper enumeration.
1215 * \internal
1216 */
1217 enum rsb_op_flags_t { RSB_OP_FLAG_DEFAULT=0x1, /* normal operation */
1218 RSB_OP_FLAG_WANT_SERIAL=RSB_OP_FLAG_WANT_SERIAL__VAL,
1219 RSB_OP_FLAG_MAY_PARALLEL=0x4,
1220 RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE=0x5, /* will process only diagonal blocks */
1221 RSB_OP_FLAG_FAKE_LOCK=0x6, /* will perform operations with no locking (thus giving incorrect results) just to determine lock overhead */
1222 RSB_OP_FLAG_WANT_PARALLEL_SORT=0x7,
1223 RSB_OP_FLAG_WANT_SERIAL_SORT=0x8,
1224 RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT=RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT__VAL,
1225 RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL=RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL_VAL,
1226 RSB_OP_FLAG_WANT_TRACE_PLOT=0x10
1227 };
1228
1229 #define RSB_BLOCK_ROWMAJOR_ADDRESS(P,LDP,NR,NC,R,C,ES) \
1230 ((rsb_char_t*)P)+((size_t)(ES))*((LDP)*(R)+(C))
1231 #define RSB_BLOCK_COLMAJOR_ADDRESS(P,LDP,NR,NC,R,C,ES) \
1232 ((rsb_char_t*)P)+((size_t)(ES))*((LDP)*(C)+(R))
1233
1234 #define RSB_BLOCK_X_MAJOR_REFERENCE(A,LDP,R,C,ONEIFISCOLMAJOR) \
1235 A[(ONEIFISCOLMAJOR)?((LDP)*(C)+(R)):((LDP)*(R)+(C))]
1236
1237 #define RSB_SOME_ERROR(ERRVAL) ((ERRVAL)!=RSB_ERR_NO_ERROR)
1238
1239 #define RSB_USE_OMP_SET_NUM_THREADS 0
1240
1241 #if RSB_USE_OMP_SET_NUM_THREADS
1242 #define rsb_set_num_threads(RNT) omp_set_num_threads(RNT)
1243 #define rsb_get_num_threads() omp_get_num_threads()
1244 #else
1245 #define rsb_set_num_threads(RNT) rsb__set_num_threads(RNT)
1246 #define rsb_get_num_threads() rsb__set_num_threads(RSB_THREADS_GET)
1247 #endif
1248 #define RSB_DO_THREADS_PUSH(RNT) {if((RNT)>0)rsb_set_num_threads(RNT); /* push */}
1249 #define RSB_DO_THREADS_POP(RNT,ORNT) {if((RNT)>0)rsb_set_num_threads(ORNT); /* pop */}
1250
1251 #if defined(RSB_WANT_RSB_NUM_THREADS) && (RSB_WANT_RSB_NUM_THREADS>0)
1252 #define RSB_NUM_THREADS_DECL const char * rnt_str = getenv("RSB_NUM_THREADS"); rsb_int_t ornt = rsb_get_num_threads(), rnt = (rnt_str? rsb__util_atoi(rnt_str) :0);
1253 #define RSB_NUM_THREADS_PUSH {RSB_DO_THREADS_PUSH(rnt); /* push */}
1254 #define RSB_NUM_THREADS_POP {RSB_DO_THREADS_POP(rnt,ornt); /* pop */}
1255 #else /* defined(RSB_WANT_RSB_NUM_THREADS) && (RSB_WANT_RSB_NUM_THREADS>0) */
1256 #define RSB_NUM_THREADS_DECL
1257 #define RSB_NUM_THREADS_PUSH
1258 #define RSB_NUM_THREADS_POP
1259 #endif /* defined(RSB_WANT_RSB_NUM_THREADS) && (RSB_WANT_RSB_NUM_THREADS>0) */
1260
1261 #if defined(RSB_BLAS_WANT_EXPERIMENTAL_TUNING)
1262 #define RSB_SPB_THREADS_PUSH {RSB_DO_THREADS_PUSH(rnt); /* push */}
1263 #define RSB_SPB_THREADS_POP {RSB_DO_THREADS_PUSH(rnt,ornt); /* pop */}
1264 #else /* defined(RSB_BLAS_WANT_EXPERIMENTAL_TUNING) */
1265 #define RSB_SPB_THREADS_PUSH
1266 #define RSB_SPB_THREADS_POP
1267 #endif /* defined(RSB_BLAS_WANT_EXPERIMENTAL_TUNING) */
1268
1269 #define RSB_SPB_THREADS_DEFAULT 0
1270 #define RSB_SPB_THREADS_AUTO -1
1271 #define RSB_SPB_THR_STR_AUTO -2
1272 #define RSB_SPB_THR_STR_AUTO_NEXTOP -3 /* TODO: need to diversify in thr.-only vs str.+thr. tuning */
1273
1274 /* #define RSB_PRINT_THREAD_STATS RSB_STDOUT("rsb_want_threads / rsb_g_threads / omp_get_max_threads / omp_get_num_threads / omp_get_thread_limit: %d / %d / %d / %d / %d\n",rsb_global_session_handle.rsb_want_threads,rsb_global_session_handle.rsb_g_threads,omp_get_max_threads(),omp_get_num_threads(),omp_get_thread_limit()); */
1275 #define RSB_PRINT_THREAD_STATS RSB_STDOUT("rsb_want_threads / rsb_g_threads / omp_get_max_threads / omp_get_num_threads / omp_get_thread_limit: %d / %d / %d / %d\n",(int)rsb_global_session_handle.rsb_want_threads,(int)rsb_global_session_handle.rsb_g_threads,(int)omp_get_max_threads(),(int)omp_get_num_threads());
1276
1277 #define RSB_ERRMSG_NOSTREAMS "streams usage configured out."
1278 #define RSB_ERRMSG_BADFORMAT "submatrix format unrecognized."
1279 #define RSB_ERRMSG_NOTMTXMKT "not a Matrix Market format matrix"
1280 #define RSB_ERRMSG_FILEOPENP "problems opening"
1281 #define RSB_ERRMSG_PROIFAMM "problems reading or interpreting file as Matrix Market"
1282 #define RSB_ERRMSG_FILEOPENPGZ "problems opening gzipped"
1283 #define RSB_ERRMSG_TMXMKTBANNER "Could not process Matrix Market banner"
1284 #define RSB_ERRMSG_BADCOO "bad input coo elements"
1285 #define RSB_INFOMSG_SAK "is a swiss army knife for testing the library functionality and performance"
1286
1287 #define RSB_WANT_COO_BEGIN 1
1288
1289 #if RSB_WANT_COO_BEGIN
1290 #define RSB_MTX_HBDF(MTXAP) ((MTXAP)->RSB_MTX_BMF==RSB_MTX_BMV)
1291 #define RSB_MTX_HBDFH(MTXAP) ((MTXAP)->RSB_MTX_BDF)
1292 #define RSB_MTX_BDF nnz
1293 #define RSB_MTX_BMF nr
1294 #define RSB_MTX_BMV -1
1295 #endif /* RSB_WANT_COO_BEGIN */
1296
1297 #define RSB_STDOUT_MATRIX_ESSENTIALS(M,MN,TN) RSB_STDOUT("%s\t%c\t%c\t%d\t%d\t%d\t%d",(const rsb_char_t*)rsb__basename(MN),rsb__do_get_symmetry_char(M),RSB_TRANSPOSITION_AS_CHAR(transA),TN,(M)->nr,(M)->nc,(M)->nnz)
1298 #define RSB_FPRINTF_MATRIX_ESSENTIALS(FD,M,MN,TN) RSB_FPRINTF(FD,"%s\t%c\t%c\t%d\t%d\t%d\t%d",(const rsb_char_t*)rsb__basename(MN),rsb__do_get_symmetry_char(M),RSB_TRANSPOSITION_AS_CHAR(transA),TN,(M)->nr,(M)->nc,(M)->nnz)
1299 #define RSB_FPINV(FPV) (1.0/(FPV))
1300 #define RSB_MILLION_I 1000000
1301 #define RSB_MILLION_F 1000000.0
1302 #define RSB_CLEARTERM_STRING "\x1B\x4D"
1303 /*#define RSB_MAX_SHORTIDX_MATRIX_DIM (RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t)-RSB_NNZ_BLK_MAX)*/
1304 #define RSB_MAX_SHORTIDX_MATRIX_DIM (RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t))
1305 #define RSB_BENCH_PROG_OPTS \
1306 {"nthreads", required_argument, NULL, 0x6E},/* n */
1307 #define RSB_MAX_ALLOCATABLE_MEMORY_CHUNK \
1308 ((size_t)((sizeof(void*)==sizeof(unsigned int))? RSB_MAX_VALUE_FOR_TYPE(unsigned int):RSB_MAX_VALUE_FOR_TYPE(size_t)))
1309 #define RSB_DOES_TRANSPOSE(TRANSA) ((TRANSA)!=RSB_TRANSPOSITION_N)
1310 #define RSB_DOES_NOT_TRANSPOSE(TRANSA) (!RSB_DOES_TRANSPOSE(TRANSA))
1311 #define RSB_DOES_CONJUGATE(TRANSA) ((TRANSA)==RSB_TRANSPOSITION_C)
1312 #define RSB_DOES_NOT_CONJUGATE(TRANSA) (!RSB_DOES_CONJUGATE(TRANSA))
1313 #define RSB_MTX_TRANSPOSED_ROWS(MTX,TRANSA) (RSB_DOES_TRANSPOSE((TRANSA))?(MTX)->nc:(MTX)->nr)
1314 #define RSB_MTX_TRANSPOSED_COLS(MTX,TRANSA) (RSB_DOES_TRANSPOSE((TRANSA))?(MTX)->nr:(MTX)->nc)
1315 #define RSB_MTX_DIAG_SIZE(MTX) RSB_MIN( (MTX)->nc,(MTX)->nr )
1316 #define RSB_MTX_DIAG_SIZE_BLK(MTX) RSB_MTX_DIAG_SIZE_BLK(MTX) + RSB_NNZ_BLK_MAX
1317
1318 #define RSB_ALLOW_ZERO_DIM RSB_MIN_MATRIX_DIM == 0
1319 #define RSB_ANY_MTX_DIM_ZERO(MTXAP) ((MTXAP) && (((MTXAP)->nr==0)||(MTXAP)->nc==0))
1320
1321 #if defined(RSB_WANT_OMP_RECURSIVE_KERNELS) && (RSB_WANT_OMP_RECURSIVE_KERNELS>0)
1322 #define RSB_NT rsb_global_session_handle.rsb_g_threads
1323 #define RSB_NTC num_threads(RSB_NT)
1324 #else
1325 #define RSB_NT
1326 #define RSB_NTC
1327 #endif
1328 #define RSB_STORE_IDXSA 1
1329
1330 #define RSB_ASSIGN_IF_SP(DSTV,SRCP) \
1331 if ( (SRCP) != NULL ) \
1332 (DSTV) = *(SRCP); /* FIXME: move this declaration elsewhere */
1333
1334 #define RSB_ASSIGN_IF_DP(DSTP,SRCV) \
1335 if ( (DSTP) != NULL ) \
1336 *(DSTP) = (SRCV); /* FIXME: move this declaration elsewhere */
1337
1338 #define RSB_ASSIGN_IF(DSTP,SRCV) RSB_ASSIGN_IF_DP(DSTP,SRCV)
1339
1340 #ifdef RSB_HAVE_ASSERT_H
1341 #ifdef RSB_USE_ASSERT
1342 /* ok, no extra action needed */
1343 #else /* RSB_USE_ASSERT */
1344 /* according to POSIX.1-2001, C89, C99, NDEBUG will cause assert to generate no code. */
1345 #define NDEBUG 1
1346 #endif /* RSB_USE_ASSERT */
1347 #include <assert.h> /* the assert() macro */
1348 #endif /* RSB_HAVE_ASSERT_H */
1349
1350 #include "rsb.h" /* public API specification */
1351 #include "rsb_init.h" /* initialization functions */
1352 #include "rsb_rec.h" /* recursion handling functions */
1353 #include "rsb_permute.h" /* permutation functions */
1354 #include "rsb_srt.h" /* sorting functions */
1355 #include "rsb_mergesort.h" /* sorting functions */
1356 #include "rsb_merge.h" /* merging functions */
1357 #include "rsb_srtp.h" /* parallel sorting functions */
1358 #include "rsb_prec.h" /* toy preconditioning */
1359 #include "rsb_msort_up.h" /* sorting functions, adapted from PSBLAS */
1360 #include "rsb_unroll.h" /* computational kernels */
1361 #include "rsb_is.h" /* coordinate handling functions */
1362 #include "rsb_src.h" /* search functions */
1363 #include "rsb_clone.h" /* clone functions */
1364 #include "rsb_err.h" /* error handling functions */
1365 #include "rsb_internals.h" /* */
1366 #include "rsb_do.h" /* */
1367 #include "rsb_mio.h" /* I/O functions */
1368 #include "rsb_get.h" /* matrix getter functions */
1369 #include "rsb_set.h" /* matrix setter functions */
1370 #include "rsb_dump.h" /* matrix dumping functions */
1371 #include "rsb_coo.h" /* coordinate handling functions */
1372 #include "rsb_csr.h" /* csr handling functions */
1373 #include "rsb_blas_stuff.h" /* BLAS like stuff */
1374 #include "rsb_op.h" /* */
1375 #include "rsb_bio.h" /* Binary Matrix I/O */
1376 #include "rsb_asm.h" /* Matrix assembly functions */
1377 #include "rsb_coo_check.h" /* */
1378 #include "rsb_coo_symm.h" /* */
1379 #include "rsb_idx.h" /* index manipulation */
1380 /* #include "rsb_ftn.h"*/ /* fortran interface functions (obsolete) */
1381 #include "rsb_libspblas_handle.h" /* */
1382 #include "rsb_render.h" /* matrix as pixmap rendering functions */
1383 #include "rsb_eps.h" /* matrix as (encapsulated) postscript rendering functions */
1384 #include "rsb_gen.h" /* matrix generating functions */
1385 #include "rsb_sys.h" /* system related functions */
1386 #include "rsb_mbw.h" /* memory benchmark related functions */
1387 #include "rsb_limiter.h" /* */
1388 #include "rsb_fpb.h" /* floating point benchmark related functions */
1389 #include "rsb_garbage.h" /* misc helpers routines */
1390 #include "rsb_pcnt.h" /* performance counters code */
1391 #include "rsb_perf.h" /* performance info gathering code */
1392 #include "rsb_pr.h" /* performance reporting */
1393 #include "rsb_util.h" /* sorting and computational stuff */
1394 #include "rsb_spmv.h" /* sparse matrix-vector multiplication */
1395 #include "rsb_swt.h" /* switching format functions */
1396 #include "rsb_lock.h" /* */
1397 #include "rsb_partition.h" /* custom partitioning stuff (OBSOLETE) */
1398 #include "rsb_krnl.h" /* kernels rsb_krnlers */
1399 #include "rsb_krnl_vb.h" /* vb specific functions */
1400 /* #include "libspblas_tests.h" */ /* */
1401 #include "rsb_test_accuracy.h" /* accuracy testing functions */
1402 #include "rsb_krnl_bcss.h" /* bcss specific functions */
1403 #include "rsb_krnl_bcoo_spmv_u.h" /* bcoo specific functions */
1404 #include "rsb_bench.h" /* performance info gathering code (OBSOLETE) */
1405 #include "rsb_spgemm.h" /* sparse matrices multiplication */
1406 #include "rsb_spgemm_csr.h" /* sparse matrices multiplication */
1407 #include "rsb_spsum_misc.h" /* sum of Sparse Matrices */
1408 #include "rsb_spsum.h" /* Sum of Sparse Matrices */
1409 #include "rsb_spsv.h" /* */
1410 #include "rsb_lbl.h" /* OBSOLETE */
1411 /* #include "rsb_experiments.h" */ /* experiments (obsolete) */
1412 #include "rsb_coo2rec.h" /* */
1413 #include "rsb_rec2coo.h" /* */
1414 #include "rsb_rec2csr.h" /* */
1415 #include "rsb_csr2coo.h" /* */
1416 #include "rsb_cpmv.h" /* */
1417 #include "rsb_tune.h" /* */
1418
1419 #ifdef __cplusplus
1420 }
1421 #endif /* __cplusplus */
1422
1423 #endif /* RSB_COMMON_H_INCLUDED */
1424 /* @endcond */
1425