1 /* @cond INNERDOC */
2 /*!
3  * @file
4  * @author Michele Martone
5  * @brief
6  * This source file contains some MKL interfacing functions.
7  * */
8 
9 
10 /*
11 
12 Copyright (C) 2008-2020 Michele Martone
13 
14 This file is part of librsb.
15 
16 librsb is free software; you can redistribute it and/or modify it
17 under the terms of the GNU Lesser General Public License as published
18 by the Free Software Foundation; either version 3 of the License, or
19 (at your option) any later version.
20 
21 librsb is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
24 License for more details.
25 
26 You should have received a copy of the GNU Lesser General Public
27 License along with librsb; see the file COPYING.
28 If not, see <http://www.gnu.org/licenses/>.
29 
30 */
31 /*
32  The code in this file was generated automatically by an M4 script.
33  It is not meant to be used as an API (Application Programming Interface).
34  p.s.: right now, only row major matrix access is considered.
35 
36  */
37 #include "rsb_mkl.h"
38 #if RSB_WANT_MKL
39 
40 
41 
rsb__mkl_gemv(rsb_type_t typecode,const void * Mp,const void * Bp,void * Xp,rsb_nnz_idx_t mdim,rsb_coo_idx_t vdim,rsb_coo_idx_t * udimp)42 rsb_err_t rsb__mkl_gemv(rsb_type_t typecode, const void * Mp, const void*Bp, void*Xp, rsb_nnz_idx_t mdim, rsb_coo_idx_t vdim, rsb_coo_idx_t*udimp)
43 {
44 	/* FIXME: TODO: incX != 1 */
45 	rsb_err_t errval = RSB_ERR_NO_ERROR;
46 	const MKL_INT dim=(rsb_coo_idx_t)sqrt((double)mdim);
47 	const MKL_INT incX=1;
48 	char transA_mkl=110;
49 	if(!Mp || !Xp || !Bp)
50 		goto err;
51 	if(dim<1 || dim>vdim)
52 		goto err;
53 #ifdef RSB_NUMERICAL_TYPE_FLOAT
54 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
55 	{
56 		const float alpha=((float)(1.0)), beta=((float)(1.0));
57 		sgemv(&transA_mkl,&dim,&dim, (float*)(&alpha),(const float*)Mp,&dim,(const float*)Bp,&incX,(float*)&beta,(float*)Xp,&incX);
58 	}
59 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
60 	else
61 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
62 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
63 	{
64 		const double alpha=((double)(1.0)), beta=((double)(1.0));
65 		dgemv(&transA_mkl,&dim,&dim, (double*)(&alpha),(const double*)Mp,&dim,(const double*)Bp,&incX,(double*)&beta,(double*)Xp,&incX);
66 	}
67 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
68 	else
69 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
70 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
71 	{
72 		const float complex alpha=((float complex)(1.0)), beta=((float complex)(1.0));
73 		cgemv(&transA_mkl,&dim,&dim, (MKL_Complex8*)(&alpha),(const MKL_Complex8*)Mp,&dim,(const MKL_Complex8*)Bp,&incX,(MKL_Complex8*)&beta,(MKL_Complex8*)Xp,&incX);
74 	}
75 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
76 	else
77 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
78 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
79 	{
80 		const double complex alpha=((double complex)(1.0)), beta=((double complex)(1.0));
81 		zgemv(&transA_mkl,&dim,&dim, (MKL_Complex16*)(&alpha),(const MKL_Complex16*)Mp,&dim,(const MKL_Complex16*)Bp,&incX,(MKL_Complex16*)&beta,(MKL_Complex16*)Xp,&incX);
82 	}
83 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
84 	else
85 		errval=RSB_ERR_BADARGS;
86 
87 	if(udimp)
88 		*udimp=dim;
89 err:
90 	return errval;
91 }
92 
rsb_rsb_to_mkl_trans(rsb_trans_t transA_mkl)93 static char rsb_rsb_to_mkl_trans(rsb_trans_t transA_mkl)
94 {
95 	/**
96 	 * \ingroup gr_internals
97 	 */
98 	switch(transA_mkl)
99 	{
100 		case(RSB_TRANSPOSITION_N):
101 		return 'n';
102 		break;
103 		case(RSB_TRANSPOSITION_T):
104 		return 't';
105 		break;
106 		case(RSB_TRANSPOSITION_C):
107 		return 'c';
108 		break;
109 		default:
110 		return 'n';	// FIXME
111 	}
112 }
113 
rsb_rsb_to_mkl_sym(rsb_flags_t flags)114 static char rsb_rsb_to_mkl_sym(rsb_flags_t flags)
115 {
116 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_SYMMETRIC))
117 		return 's';
118 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_TRIANGULAR))
119 		return 't';
120 	else
121 		return 'g';
122 }
123 
rsb_rsb_to_mkl_upl(rsb_flags_t flags)124 static char rsb_rsb_to_mkl_upl(rsb_flags_t flags)
125 {
126 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_LOWER))
127 		return 'l';
128 	else
129 		return 'u';
130 }
131 
rsb__mkl_coo_spmv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)132 rsb_err_t rsb__mkl_coo_spmv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
133 {
134 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
135 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
136 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
137 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
138 	matdescra[2] = 'n'; // not unit diagonal
139 	matdescra[3] = 'c'; // zero based indexing
140 
141 #ifdef RSB_NUMERICAL_TYPE_FLOAT
142 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
143 	mkl_scoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(float*)x,(float*)betap,(float*)y);
144 	else
145 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
146 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
147 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
148 	mkl_dcoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(double*)x,(double*)betap,(double*)y);
149 	else
150 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
151 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
152 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
153 	mkl_ccoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex8*)x,(MKL_Complex8*)betap,(MKL_Complex8*)y);
154 	else
155 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
156 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
157 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
158 	mkl_zcoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex16*)x,(MKL_Complex16*)betap,(MKL_Complex16*)y);
159 	else
160 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
161 	return RSB_ERR_UNSUPPORTED_TYPE	;
162 	return RSB_ERR_NO_ERROR;
163 }
164 
rsb__mkl_coo_spmm(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nrhs,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,const MKL_INT ldb,void * c,const MKL_INT ldc,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)165 rsb_err_t rsb__mkl_coo_spmm(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nrhs, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, const MKL_INT ldb, void * c, const MKL_INT ldc, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
166 {
167 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
168 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
169 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
170 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
171 	matdescra[2] = 'n'; // not unit diagonal
172 	matdescra[3] = 'c'; // zero based indexing
173 
174 #ifdef RSB_NUMERICAL_TYPE_FLOAT
175 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
176 	mkl_scoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(float*)b,(MKL_INT*)(&ldb),(float*)betap,(float*)c,(MKL_INT*)(&ldc));
177 	else
178 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
179 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
180 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
181 	mkl_dcoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(double*)b,(MKL_INT*)(&ldb),(double*)betap,(double*)c,(MKL_INT*)(&ldc));
182 	else
183 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
184 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
185 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
186 	mkl_ccoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex8*)b,(MKL_INT*)(&ldb),(MKL_Complex8*)betap,(MKL_Complex8*)c,(MKL_INT*)(&ldc));
187 	else
188 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
189 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
190 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
191 	mkl_zcoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex16*)b,(MKL_INT*)(&ldb),(MKL_Complex16*)betap,(MKL_Complex16*)c,(MKL_INT*)(&ldc));
192 	else
193 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
194 	return RSB_ERR_UNSUPPORTED_TYPE	;
195 	return RSB_ERR_NO_ERROR;
196 }
197 
rsb__mkl_coo_spsv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)198 rsb_err_t rsb__mkl_coo_spsv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
199 {
200 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
201 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
202 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
203 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
204 	matdescra[2] = 'n'; // not unit diagonal
205 	matdescra[3] = 'c'; // zero based indexing
206 	/* 20101118	MKL 9.1 reference manual declares also k among the parameters */
207 #ifdef RSB_NUMERICAL_TYPE_FLOAT
208 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
209 	mkl_scoosv(&transA_mkl,(MKL_INT*)(&m),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(float*)x,(float*)y);
210 	else
211 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
212 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
213 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
214 	mkl_dcoosv(&transA_mkl,(MKL_INT*)(&m),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(double*)x,(double*)y);
215 	else
216 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
217 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
218 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
219 	mkl_ccoosv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex8*)x,(MKL_Complex8*)y);
220 	else
221 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
222 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
223 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
224 	mkl_zcoosv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex16*)x,(MKL_Complex16*)y);
225 	else
226 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
227 	return RSB_ERR_UNSUPPORTED_TYPE	;
228 	return RSB_ERR_NO_ERROR;
229 }
230 
rsb__do_mkl_csr_spmv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)231 static rsb_err_t rsb__do_mkl_csr_spmv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags){
232 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
233 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
234 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
235 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
236 	matdescra[2] = 'n'; // not unit diagonal
237 	matdescra[3] = 'c'; // zero based indexing
238 
239 #ifdef RSB_NUMERICAL_TYPE_FLOAT
240 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
241 	mkl_scsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)x,(float*)betap,(float*)y);
242 	else
243 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
244 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
245 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
246 	mkl_dcsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)x,(double*)betap,(double*)y);
247 	else
248 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
249 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
250 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
251 	mkl_ccsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)x,(MKL_Complex8*)betap,(MKL_Complex8*)y);
252 	else
253 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
254 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
255 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
256 	mkl_zcsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)x,(MKL_Complex16*)betap,(MKL_Complex16*)y);
257 	else
258 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
259 	return RSB_ERR_UNSUPPORTED_TYPE	;
260 	return RSB_ERR_NO_ERROR;
261 }
262 
263 /* The following three look weird, I know. */
264 #define RSB_GET_MKL_MAX_THREADS rsb__set_num_threads(RSB_THREADS_GET_MAX_SYS)
265 #define RSB_GET_MKL_BASE_THREADS 1 /* FIXME: no mkl_get_num_threads */
266 #define RSB_GET_MKL_DEFAULT_THREADS mkl_get_max_threads() /* omp_get_num_threads(); */
267 #define RSB_MKL_MAX_AT_TIME 1.0 /* FIXME */
268 
269 #define RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)				\
270 		if(RSB_DT_SAME_THREADS_TNP(otnp))			\
271 			lnt = unt = 0; 					\
272 		else							\
273 		{							\
274 			if(RSB_DT_THREADS_TUNE_TNP(otnp))		\
275 				; /* ok */				\
276 			else						\
277 				if(RSB_DT_SPEC_THREADS_TNP(otnp))	\
278 			lnt = unt = *otnp;				\
279 		}
280 
281 #define RSB_MKL_THREADS_TUNING_ODECLS					\
282 		rsb_time_t tinf = rsb__timer_granularity();		\
283 		rsb_time_t best = RSB_CONST_IMPOSSIBLY_BIG_TIME;	\
284 		rsb_thread_t ont = RSB_GET_MKL_BASE_THREADS;		\
285 		rsb_thread_t nt, lnt = 1, unt = RSB_GET_MKL_MAX_THREADS;\
286 		rsb_thread_t otn = ont;					\
287 		rsb_thread_t dtn = RSB_GET_MKL_DEFAULT_THREADS;
288 
289 
290 #define RSB_MKL_THREADS_TUNING_IDECLS									\
291 			rsb_time_t it = rsb_time(), ct = RSB_TIME_ZERO;	/* initial/current time */	\
292 			rsb_time_t dt = it, tt = RSB_TIME_ZERO; /* elapsed (delta) / total  time */	\
293 			rsb_time_t bt = RSB_CONST_IMPOSSIBLY_BIG_TIME, wt = RSB_TIME_ZERO; /* best / worst  time */	\
294 			rsb_time_t ss = RSB_TIME_ZERO; /* sum of squares */				\
295 			rsb_time_t mint = RSB_TIME_ZERO; /* minimal time */				\
296 			rsb_int_t times = 0;									\
297 			const mintimes = RSB_CONST_AT_OP_SAMPLES_MIN, maxtimes = RSB_CONST_AT_OP_SAMPLES_MAX;   \
298 			rsb_time_t maxt = RSB_AT_MAX_TIME/* RSB_MKL_MAX_AT_TIME*/;
299 
rsb__mkl_csr_spmv_bench(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)300 rsb_err_t rsb__mkl_csr_spmv_bench(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
301 {
302 	rsb_err_t errval = RSB_ERR_NO_ERROR;
303 	rsb_time_t dt, tpo;
304 
305 	if(otnp)
306 	{
307 		RSB_MKL_THREADS_TUNING_ODECLS
308 		RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
309 
310 		for(nt=lnt;nt<=unt;++nt)
311 		{
312 			RSB_MKL_THREADS_TUNING_IDECLS
313 			if(nt) mkl_set_num_threads(nt);
314 
315 			do
316 			{
317 				errval = rsb__do_mkl_csr_spmv(VA, m, k, nnz, IA, JA, x, y, alphap, betap, transA, typecode, flags);
318 				RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
319 			}
320 			while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
321 
322 			dt = bt;
323 			if(dt < best )
324 			{
325 				otn = nt;
326 				best = RSB_MIN_ABOVE_INF(best,dt,tinf);
327 				RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
328 			}
329 			rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
330 			if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
331 		}
332 		mkl_set_num_threads(ont);
333 done:
334 		ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
335 		tpo = best; /* tpo = 1.0 / best; */
336 		*otnp = otn;
337 	}
338 	else
339 	{
340 		dt = -rsb_time();
341 		errval = rsb__do_mkl_csr_spmv(VA, m, k, nnz, IA, JA, x, y, alphap, betap, transA, typecode, flags);
342 		dt += rsb_time();
343 		/* tpo = 1.0 / dt; */
344 		tpo = dt;
345 	}
346 	if(tpop)
347 		*tpop = tpo;
348 	return errval;
349 }
350 
rsb__do_mkl_csr_spmm(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT n,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,const MKL_INT ldb,void * c,const MKL_INT ldc,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)351 static rsb_err_t rsb__do_mkl_csr_spmm(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT n, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, const MKL_INT ldb, void * c, const MKL_INT ldc, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags){
352 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
353 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
354 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
355 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
356 	matdescra[2] = 'n'; // not unit diagonal
357 	matdescra[3] = 'c'; // zero based indexing
358 	MKL_INT ldb_ = n, ldc_ = n; /* for zero based indexing */
359 
360 	if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
361 		ldb_ = k, ldc_ = m, /* for one based indexing */
362 		matdescra[3] = 'f'; // one based indexing
363 
364 	#if 1
365 	/* n = nrhs */
366 #ifdef RSB_NUMERICAL_TYPE_FLOAT
367 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
368 	mkl_scsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)b,(MKL_INT*)(&ldb_),(float*)betap,(float*)c,(MKL_INT*)(&ldc_));
369 	else
370 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
371 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
372 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
373 	mkl_dcsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)b,(MKL_INT*)(&ldb_),(double*)betap,(double*)c,(MKL_INT*)(&ldc_));
374 	else
375 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
376 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
377 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
378 	mkl_ccsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)b,(MKL_INT*)(&ldb_),(MKL_Complex8*)betap,(MKL_Complex8*)c,(MKL_INT*)(&ldc_));
379 	else
380 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
381 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
382 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
383 	mkl_zcsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)b,(MKL_INT*)(&ldb_),(MKL_Complex16*)betap,(MKL_Complex16*)c,(MKL_INT*)(&ldc_));
384 	else
385 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
386 	return RSB_ERR_UNSUPPORTED_TYPE	;
387 #endif /* 1 */
388 	return RSB_ERR_NO_ERROR;
389 }
390 
rsb__mkl_csr_spmm_bench(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT n,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,const MKL_INT ldb,void * c,const MKL_INT ldc,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)391 rsb_err_t rsb__mkl_csr_spmm_bench(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT n, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, const MKL_INT ldb, void * c, const MKL_INT ldc, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
392 {
393 	rsb_err_t errval = RSB_ERR_NO_ERROR;
394 	rsb_time_t dt, tpo;
395 
396 	if(otnp)
397 	{
398 		RSB_MKL_THREADS_TUNING_ODECLS
399 		RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
400 
401 		for(nt=lnt;nt<=unt;++nt)
402 		{
403 			RSB_MKL_THREADS_TUNING_IDECLS
404 			if(nt) mkl_set_num_threads(nt);
405 
406 			do
407 			{
408 				errval = rsb__do_mkl_csr_spmm(VA, m, k, n, nnz, IA, JA, b, ldb, c, ldc, alphap, betap, transA, typecode, flags);
409 				RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
410 			}
411 			while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
412 
413 			dt = bt;
414 			if(dt < best )
415 			{
416 				otn = nt;
417 				best = RSB_MIN_ABOVE_INF(best,dt,tinf);
418 				RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
419 			}
420 			rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
421 			if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
422 		}
423 		mkl_set_num_threads(ont);
424 done:
425 		ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
426 		tpo = best; /* tpo = 1.0 / best; */
427 		*otnp = otn;
428 	}
429 	else
430 	{
431 		dt = -rsb_time();
432 		errval = rsb__do_mkl_csr_spmm(VA, m, k, n, nnz, IA, JA, b, ldb, c, ldc, alphap, betap, transA, typecode, flags);
433 		dt += rsb_time();
434 		/* tpo = 1.0 / dt; */
435 		tpo = dt;
436 	}
437 	if(tpop)
438 		*tpop = tpo;
439 	return errval;
440 }
441 
rsb__do_mkl_csr_spsv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)442 rsb_err_t rsb__do_mkl_csr_spsv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
443 {
444 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
445 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
446 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
447 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
448 	matdescra[2] = 'n'; // not unit diagonal
449 	matdescra[3] = 'c'; // zero based indexing
450 
451 #ifdef RSB_NUMERICAL_TYPE_FLOAT
452 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
453 	mkl_scsrsv(&transA_mkl,(MKL_INT*)(&m),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)x,(float*)y);
454 	else
455 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
456 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
457 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
458 	mkl_dcsrsv(&transA_mkl,(MKL_INT*)(&m),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)x,(double*)y);
459 	else
460 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
461 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
462 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
463 	mkl_ccsrsv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)x,(MKL_Complex8*)y);
464 	else
465 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
466 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
467 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
468 	mkl_zcsrsv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)x,(MKL_Complex16*)y);
469 	else
470 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
471 	return RSB_ERR_UNSUPPORTED_TYPE	;
472 	return RSB_ERR_NO_ERROR;
473 }
474 
rsb__do_mkl_csr_spsm(const void * VA,const MKL_INT m,const MKL_INT nrhs,const MKL_INT * IA,const MKL_INT * JA,const void * b,void * c,const void * alphap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,const MKL_INT ldb,const MKL_INT ldc)475 rsb_err_t rsb__do_mkl_csr_spsm(const void *VA, const MKL_INT m, const MKL_INT nrhs, const MKL_INT * IA, const MKL_INT *JA, const void * b, void * c, const void *alphap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, const MKL_INT ldb, const MKL_INT ldc)
476 {
477 	char transA_mkl = rsb_rsb_to_mkl_trans(transA);
478 	char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
479 	matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
480 	matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
481 	matdescra[2] = 'n'; // not unit diagonal
482 	matdescra[3] = 'c'; // zero based indexing
483 
484 #ifdef RSB_NUMERICAL_TYPE_FLOAT
485 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
486 	mkl_scsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)b,(MKL_INT*)&ldb,(float*)c,(MKL_INT*)&ldc);
487 	else
488 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
489 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
490 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
491 	mkl_dcsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)b,(MKL_INT*)&ldb,(double*)c,(MKL_INT*)&ldc);
492 	else
493 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
494 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
495 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
496 	mkl_ccsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)b,(MKL_INT*)&ldb,(MKL_Complex8*)c,(MKL_INT*)&ldc);
497 	else
498 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
499 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
500 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
501 	mkl_zcsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)b,(MKL_INT*)&ldb,(MKL_Complex16*)c,(MKL_INT*)&ldc);
502 	else
503 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
504 	return RSB_ERR_UNSUPPORTED_TYPE	;
505 	return RSB_ERR_NO_ERROR;
506 }
507 
rsb__mkl_csr_spsv_bench(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,void * c,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)508 rsb_err_t rsb__mkl_csr_spsv_bench(const void *VA, const MKL_INT m, const MKL_INT k/*, const MKL_INT n*/, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, /*const MKL_INT ldb,*/ void * c,/* const MKL_INT ldc,*/ const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
509 {
510 	rsb_err_t errval = RSB_ERR_NO_ERROR;
511 	rsb_time_t dt, tpo;
512 
513 	if(otnp)
514 	{
515 		RSB_MKL_THREADS_TUNING_ODECLS
516 		RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
517 
518 		for(nt=lnt;nt<=unt;++nt)
519 		{
520 			RSB_MKL_THREADS_TUNING_IDECLS
521 			if(nt) mkl_set_num_threads(nt);
522 
523 			do
524 			{
525 				errval = rsb__do_mkl_csr_spsv(VA, m, k, nnz, IA, JA, b, c, alphap, betap, transA, typecode, flags);
526 				RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
527 			}
528 			while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
529 
530 			dt = bt;
531 			if(dt < best )
532 			{
533 				otn = nt;
534 				best = RSB_MIN_ABOVE_INF(best,dt,tinf);
535 				RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
536 			}
537 			rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
538 			if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
539 		}
540 		mkl_set_num_threads(ont);
541 done:
542 		ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
543 		tpo = best; /* tpo = 1.0 / best; */
544 		*otnp = otn;
545 	}
546 	else
547 	{
548 		dt = -rsb_time();
549 		errval = rsb__do_mkl_csr_spsv(VA, m, k, nnz, IA, JA, b, c, alphap, betap, transA, typecode, flags);
550 		dt += rsb_time();
551 		/* tpo = 1.0 / dt; */
552 		tpo = dt;
553 	}
554 	if(tpop)
555 		*tpop = tpo;
556 	return errval;
557 }
558 
rsb__mkl_csr_spsm_bench(const void * VA,const MKL_INT m,const MKL_INT nrhs,const MKL_INT * IA,const MKL_INT * JA,const void * b,void * c,const void * alphap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,const MKL_INT ldb,const MKL_INT ldc,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)559 rsb_err_t rsb__mkl_csr_spsm_bench(const void *VA, const MKL_INT m, const MKL_INT nrhs, const MKL_INT * IA, const MKL_INT *JA, const void * b, void * c, const void *alphap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, const MKL_INT ldb, const MKL_INT ldc, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
560 {
561 	rsb_err_t errval = RSB_ERR_NO_ERROR;
562 	rsb_time_t dt, tpo;
563 
564 	if(otnp)
565 	{
566 		RSB_MKL_THREADS_TUNING_ODECLS
567 		RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
568 
569 		for(nt=lnt;nt<=unt;++nt)
570 		{
571 			RSB_MKL_THREADS_TUNING_IDECLS
572 			if(nt) mkl_set_num_threads(nt);
573 
574 			do
575 			{
576 				errval = rsb__do_mkl_csr_spsm(VA, m, nrhs, IA, JA, b, c, alphap, transA, typecode, flags, ldb, ldc);
577 				RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
578 			}
579 			while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
580 
581 			dt = bt;
582 			if(dt < best )
583 			{
584 				otn = nt;
585 				best = RSB_MIN_ABOVE_INF(best,dt,tinf);
586 				RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
587 			}
588 			rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
589 			if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
590 		}
591 		mkl_set_num_threads(ont);
592 done:
593 		ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
594 		tpo = best; /* tpo = 1.0 / best; */
595 		*otnp = otn;
596 	}
597 	else
598 	{
599 		dt = -rsb_time();
600 		errval = rsb__do_mkl_csr_spsm(VA, m, nrhs, IA, JA, b, c, alphap, transA, typecode, flags, ldb, ldc);
601 		dt += rsb_time();
602 		/* tpo = 1.0 / dt; */
603 		tpo = dt;
604 	}
605 	if(tpop)
606 		*tpop = tpo;
607 	return errval;
608 }
609 
rsb__mkl_coo2csr(const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const void * IVA,const MKL_INT * IIA,const MKL_INT * IJA,const void * OVA,const MKL_INT * OIA,const MKL_INT * OJA,rsb_type_t typecode,const MKL_INT mib)610 rsb_err_t rsb__mkl_coo2csr(const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const void *IVA, const MKL_INT * IIA, const MKL_INT *IJA, const void *OVA, const MKL_INT * OIA, const MKL_INT *OJA, rsb_type_t typecode, const MKL_INT mib)
611 {
612 	int info;
613 	int job[6];
614 	job[0] = 1; // coo2csr (=1, the matrix in the coordinate format is converted to the CSR;=2, the matrix in the coordinate format is converted to the CSR format, and the column indices in CSR representation are sorted in the increasing order within each row.)
615 	job[1] = mib; // 0 based csr
616 	job[2] = 0; // 0 based coo
617 	job[3] = 0; // ignored
618 	job[4] = nnz; // ignored here
619 	job[5] = 0; // fill all three arrays
620 
621 #ifdef RSB_NUMERICAL_TYPE_FLOAT
622 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT  )
623 	mkl_scsrcoo(job,(MKL_INT*)(&m),(float*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(float*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
624 	else
625 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
626 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
627 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE  )
628 	mkl_dcsrcoo(job,(MKL_INT*)(&m),(double*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(double*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
629 	else
630 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
631 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
632 	if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX  )
633 	mkl_ccsrcoo(job,(MKL_INT*)(&m),(MKL_Complex8*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(MKL_Complex8*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
634 	else
635 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
636 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
637 	if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX  )
638 	mkl_zcsrcoo(job,(MKL_INT*)(&m),(MKL_Complex16*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(MKL_Complex16*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
639 	else
640 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
641 	return RSB_ERR_UNSUPPORTED_TYPE	;
642 	return RSB_ERR_NO_ERROR;
643 }
644 
645 #endif /* RSB_WANT_MKL */
646 /* @endcond */
647 
648