1 /* @cond INNERDOC */
2 /*!
3 * @file
4 * @author Michele Martone
5 * @brief
6 * This source file contains some MKL interfacing functions.
7 * */
8
9
10 /*
11
12 Copyright (C) 2008-2020 Michele Martone
13
14 This file is part of librsb.
15
16 librsb is free software; you can redistribute it and/or modify it
17 under the terms of the GNU Lesser General Public License as published
18 by the Free Software Foundation; either version 3 of the License, or
19 (at your option) any later version.
20
21 librsb is distributed in the hope that it will be useful, but WITHOUT
22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
24 License for more details.
25
26 You should have received a copy of the GNU Lesser General Public
27 License along with librsb; see the file COPYING.
28 If not, see <http://www.gnu.org/licenses/>.
29
30 */
31 /*
32 The code in this file was generated automatically by an M4 script.
33 It is not meant to be used as an API (Application Programming Interface).
34 p.s.: right now, only row major matrix access is considered.
35
36 */
37 #include "rsb_mkl.h"
38 #if RSB_WANT_MKL
39
40
41
rsb__mkl_gemv(rsb_type_t typecode,const void * Mp,const void * Bp,void * Xp,rsb_nnz_idx_t mdim,rsb_coo_idx_t vdim,rsb_coo_idx_t * udimp)42 rsb_err_t rsb__mkl_gemv(rsb_type_t typecode, const void * Mp, const void*Bp, void*Xp, rsb_nnz_idx_t mdim, rsb_coo_idx_t vdim, rsb_coo_idx_t*udimp)
43 {
44 /* FIXME: TODO: incX != 1 */
45 rsb_err_t errval = RSB_ERR_NO_ERROR;
46 const MKL_INT dim=(rsb_coo_idx_t)sqrt((double)mdim);
47 const MKL_INT incX=1;
48 char transA_mkl=110;
49 if(!Mp || !Xp || !Bp)
50 goto err;
51 if(dim<1 || dim>vdim)
52 goto err;
53 #ifdef RSB_NUMERICAL_TYPE_FLOAT
54 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
55 {
56 const float alpha=((float)(1.0)), beta=((float)(1.0));
57 sgemv(&transA_mkl,&dim,&dim, (float*)(&alpha),(const float*)Mp,&dim,(const float*)Bp,&incX,(float*)&beta,(float*)Xp,&incX);
58 }
59 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
60 else
61 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
62 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
63 {
64 const double alpha=((double)(1.0)), beta=((double)(1.0));
65 dgemv(&transA_mkl,&dim,&dim, (double*)(&alpha),(const double*)Mp,&dim,(const double*)Bp,&incX,(double*)&beta,(double*)Xp,&incX);
66 }
67 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
68 else
69 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
70 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
71 {
72 const float complex alpha=((float complex)(1.0)), beta=((float complex)(1.0));
73 cgemv(&transA_mkl,&dim,&dim, (MKL_Complex8*)(&alpha),(const MKL_Complex8*)Mp,&dim,(const MKL_Complex8*)Bp,&incX,(MKL_Complex8*)&beta,(MKL_Complex8*)Xp,&incX);
74 }
75 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
76 else
77 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
78 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
79 {
80 const double complex alpha=((double complex)(1.0)), beta=((double complex)(1.0));
81 zgemv(&transA_mkl,&dim,&dim, (MKL_Complex16*)(&alpha),(const MKL_Complex16*)Mp,&dim,(const MKL_Complex16*)Bp,&incX,(MKL_Complex16*)&beta,(MKL_Complex16*)Xp,&incX);
82 }
83 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
84 else
85 errval=RSB_ERR_BADARGS;
86
87 if(udimp)
88 *udimp=dim;
89 err:
90 return errval;
91 }
92
rsb_rsb_to_mkl_trans(rsb_trans_t transA_mkl)93 static char rsb_rsb_to_mkl_trans(rsb_trans_t transA_mkl)
94 {
95 /**
96 * \ingroup gr_internals
97 */
98 switch(transA_mkl)
99 {
100 case(RSB_TRANSPOSITION_N):
101 return 'n';
102 break;
103 case(RSB_TRANSPOSITION_T):
104 return 't';
105 break;
106 case(RSB_TRANSPOSITION_C):
107 return 'c';
108 break;
109 default:
110 return 'n'; // FIXME
111 }
112 }
113
rsb_rsb_to_mkl_sym(rsb_flags_t flags)114 static char rsb_rsb_to_mkl_sym(rsb_flags_t flags)
115 {
116 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_SYMMETRIC))
117 return 's';
118 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_TRIANGULAR))
119 return 't';
120 else
121 return 'g';
122 }
123
rsb_rsb_to_mkl_upl(rsb_flags_t flags)124 static char rsb_rsb_to_mkl_upl(rsb_flags_t flags)
125 {
126 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_LOWER))
127 return 'l';
128 else
129 return 'u';
130 }
131
rsb__mkl_coo_spmv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)132 rsb_err_t rsb__mkl_coo_spmv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
133 {
134 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
135 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
136 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
137 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
138 matdescra[2] = 'n'; // not unit diagonal
139 matdescra[3] = 'c'; // zero based indexing
140
141 #ifdef RSB_NUMERICAL_TYPE_FLOAT
142 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
143 mkl_scoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(float*)x,(float*)betap,(float*)y);
144 else
145 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
146 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
147 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
148 mkl_dcoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(double*)x,(double*)betap,(double*)y);
149 else
150 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
151 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
152 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
153 mkl_ccoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex8*)x,(MKL_Complex8*)betap,(MKL_Complex8*)y);
154 else
155 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
156 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
157 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
158 mkl_zcoomv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex16*)x,(MKL_Complex16*)betap,(MKL_Complex16*)y);
159 else
160 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
161 return RSB_ERR_UNSUPPORTED_TYPE ;
162 return RSB_ERR_NO_ERROR;
163 }
164
rsb__mkl_coo_spmm(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nrhs,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,const MKL_INT ldb,void * c,const MKL_INT ldc,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)165 rsb_err_t rsb__mkl_coo_spmm(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nrhs, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, const MKL_INT ldb, void * c, const MKL_INT ldc, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
166 {
167 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
168 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
169 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
170 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
171 matdescra[2] = 'n'; // not unit diagonal
172 matdescra[3] = 'c'; // zero based indexing
173
174 #ifdef RSB_NUMERICAL_TYPE_FLOAT
175 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
176 mkl_scoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(float*)b,(MKL_INT*)(&ldb),(float*)betap,(float*)c,(MKL_INT*)(&ldc));
177 else
178 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
179 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
180 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
181 mkl_dcoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(double*)b,(MKL_INT*)(&ldb),(double*)betap,(double*)c,(MKL_INT*)(&ldc));
182 else
183 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
184 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
185 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
186 mkl_ccoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex8*)b,(MKL_INT*)(&ldb),(MKL_Complex8*)betap,(MKL_Complex8*)c,(MKL_INT*)(&ldc));
187 else
188 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
189 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
190 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
191 mkl_zcoomm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex16*)b,(MKL_INT*)(&ldb),(MKL_Complex16*)betap,(MKL_Complex16*)c,(MKL_INT*)(&ldc));
192 else
193 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
194 return RSB_ERR_UNSUPPORTED_TYPE ;
195 return RSB_ERR_NO_ERROR;
196 }
197
rsb__mkl_coo_spsv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)198 rsb_err_t rsb__mkl_coo_spsv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
199 {
200 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
201 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
202 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
203 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
204 matdescra[2] = 'n'; // not unit diagonal
205 matdescra[3] = 'c'; // zero based indexing
206 /* 20101118 MKL 9.1 reference manual declares also k among the parameters */
207 #ifdef RSB_NUMERICAL_TYPE_FLOAT
208 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
209 mkl_scoosv(&transA_mkl,(MKL_INT*)(&m),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(float*)x,(float*)y);
210 else
211 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
212 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
213 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
214 mkl_dcoosv(&transA_mkl,(MKL_INT*)(&m),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(double*)x,(double*)y);
215 else
216 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
217 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
218 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
219 mkl_ccoosv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex8*)x,(MKL_Complex8*)y);
220 else
221 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
222 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
223 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
224 mkl_zcoosv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)IA,(MKL_INT*)JA,(MKL_INT*)(&nnz),(MKL_Complex16*)x,(MKL_Complex16*)y);
225 else
226 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
227 return RSB_ERR_UNSUPPORTED_TYPE ;
228 return RSB_ERR_NO_ERROR;
229 }
230
rsb__do_mkl_csr_spmv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)231 static rsb_err_t rsb__do_mkl_csr_spmv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags){
232 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
233 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
234 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
235 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
236 matdescra[2] = 'n'; // not unit diagonal
237 matdescra[3] = 'c'; // zero based indexing
238
239 #ifdef RSB_NUMERICAL_TYPE_FLOAT
240 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
241 mkl_scsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)x,(float*)betap,(float*)y);
242 else
243 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
244 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
245 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
246 mkl_dcsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)x,(double*)betap,(double*)y);
247 else
248 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
249 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
250 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
251 mkl_ccsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)x,(MKL_Complex8*)betap,(MKL_Complex8*)y);
252 else
253 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
254 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
255 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
256 mkl_zcsrmv(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)x,(MKL_Complex16*)betap,(MKL_Complex16*)y);
257 else
258 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
259 return RSB_ERR_UNSUPPORTED_TYPE ;
260 return RSB_ERR_NO_ERROR;
261 }
262
263 /* The following three look weird, I know. */
264 #define RSB_GET_MKL_MAX_THREADS rsb__set_num_threads(RSB_THREADS_GET_MAX_SYS)
265 #define RSB_GET_MKL_BASE_THREADS 1 /* FIXME: no mkl_get_num_threads */
266 #define RSB_GET_MKL_DEFAULT_THREADS mkl_get_max_threads() /* omp_get_num_threads(); */
267 #define RSB_MKL_MAX_AT_TIME 1.0 /* FIXME */
268
269 #define RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp) \
270 if(RSB_DT_SAME_THREADS_TNP(otnp)) \
271 lnt = unt = 0; \
272 else \
273 { \
274 if(RSB_DT_THREADS_TUNE_TNP(otnp)) \
275 ; /* ok */ \
276 else \
277 if(RSB_DT_SPEC_THREADS_TNP(otnp)) \
278 lnt = unt = *otnp; \
279 }
280
281 #define RSB_MKL_THREADS_TUNING_ODECLS \
282 rsb_time_t tinf = rsb__timer_granularity(); \
283 rsb_time_t best = RSB_CONST_IMPOSSIBLY_BIG_TIME; \
284 rsb_thread_t ont = RSB_GET_MKL_BASE_THREADS; \
285 rsb_thread_t nt, lnt = 1, unt = RSB_GET_MKL_MAX_THREADS;\
286 rsb_thread_t otn = ont; \
287 rsb_thread_t dtn = RSB_GET_MKL_DEFAULT_THREADS;
288
289
290 #define RSB_MKL_THREADS_TUNING_IDECLS \
291 rsb_time_t it = rsb_time(), ct = RSB_TIME_ZERO; /* initial/current time */ \
292 rsb_time_t dt = it, tt = RSB_TIME_ZERO; /* elapsed (delta) / total time */ \
293 rsb_time_t bt = RSB_CONST_IMPOSSIBLY_BIG_TIME, wt = RSB_TIME_ZERO; /* best / worst time */ \
294 rsb_time_t ss = RSB_TIME_ZERO; /* sum of squares */ \
295 rsb_time_t mint = RSB_TIME_ZERO; /* minimal time */ \
296 rsb_int_t times = 0; \
297 const mintimes = RSB_CONST_AT_OP_SAMPLES_MIN, maxtimes = RSB_CONST_AT_OP_SAMPLES_MAX; \
298 rsb_time_t maxt = RSB_AT_MAX_TIME/* RSB_MKL_MAX_AT_TIME*/;
299
rsb__mkl_csr_spmv_bench(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)300 rsb_err_t rsb__mkl_csr_spmv_bench(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
301 {
302 rsb_err_t errval = RSB_ERR_NO_ERROR;
303 rsb_time_t dt, tpo;
304
305 if(otnp)
306 {
307 RSB_MKL_THREADS_TUNING_ODECLS
308 RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
309
310 for(nt=lnt;nt<=unt;++nt)
311 {
312 RSB_MKL_THREADS_TUNING_IDECLS
313 if(nt) mkl_set_num_threads(nt);
314
315 do
316 {
317 errval = rsb__do_mkl_csr_spmv(VA, m, k, nnz, IA, JA, x, y, alphap, betap, transA, typecode, flags);
318 RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
319 }
320 while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
321
322 dt = bt;
323 if(dt < best )
324 {
325 otn = nt;
326 best = RSB_MIN_ABOVE_INF(best,dt,tinf);
327 RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
328 }
329 rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
330 if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
331 }
332 mkl_set_num_threads(ont);
333 done:
334 ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
335 tpo = best; /* tpo = 1.0 / best; */
336 *otnp = otn;
337 }
338 else
339 {
340 dt = -rsb_time();
341 errval = rsb__do_mkl_csr_spmv(VA, m, k, nnz, IA, JA, x, y, alphap, betap, transA, typecode, flags);
342 dt += rsb_time();
343 /* tpo = 1.0 / dt; */
344 tpo = dt;
345 }
346 if(tpop)
347 *tpop = tpo;
348 return errval;
349 }
350
rsb__do_mkl_csr_spmm(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT n,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,const MKL_INT ldb,void * c,const MKL_INT ldc,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)351 static rsb_err_t rsb__do_mkl_csr_spmm(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT n, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, const MKL_INT ldb, void * c, const MKL_INT ldc, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags){
352 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
353 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
354 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
355 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
356 matdescra[2] = 'n'; // not unit diagonal
357 matdescra[3] = 'c'; // zero based indexing
358 MKL_INT ldb_ = n, ldc_ = n; /* for zero based indexing */
359
360 if(RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE))
361 ldb_ = k, ldc_ = m, /* for one based indexing */
362 matdescra[3] = 'f'; // one based indexing
363
364 #if 1
365 /* n = nrhs */
366 #ifdef RSB_NUMERICAL_TYPE_FLOAT
367 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
368 mkl_scsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)b,(MKL_INT*)(&ldb_),(float*)betap,(float*)c,(MKL_INT*)(&ldc_));
369 else
370 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
371 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
372 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
373 mkl_dcsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)b,(MKL_INT*)(&ldb_),(double*)betap,(double*)c,(MKL_INT*)(&ldc_));
374 else
375 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
376 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
377 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
378 mkl_ccsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)b,(MKL_INT*)(&ldb_),(MKL_Complex8*)betap,(MKL_Complex8*)c,(MKL_INT*)(&ldc_));
379 else
380 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
381 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
382 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
383 mkl_zcsrmm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&n),(MKL_INT*)(&k),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)b,(MKL_INT*)(&ldb_),(MKL_Complex16*)betap,(MKL_Complex16*)c,(MKL_INT*)(&ldc_));
384 else
385 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
386 return RSB_ERR_UNSUPPORTED_TYPE ;
387 #endif /* 1 */
388 return RSB_ERR_NO_ERROR;
389 }
390
rsb__mkl_csr_spmm_bench(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT n,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,const MKL_INT ldb,void * c,const MKL_INT ldc,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)391 rsb_err_t rsb__mkl_csr_spmm_bench(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT n, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, const MKL_INT ldb, void * c, const MKL_INT ldc, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
392 {
393 rsb_err_t errval = RSB_ERR_NO_ERROR;
394 rsb_time_t dt, tpo;
395
396 if(otnp)
397 {
398 RSB_MKL_THREADS_TUNING_ODECLS
399 RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
400
401 for(nt=lnt;nt<=unt;++nt)
402 {
403 RSB_MKL_THREADS_TUNING_IDECLS
404 if(nt) mkl_set_num_threads(nt);
405
406 do
407 {
408 errval = rsb__do_mkl_csr_spmm(VA, m, k, n, nnz, IA, JA, b, ldb, c, ldc, alphap, betap, transA, typecode, flags);
409 RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
410 }
411 while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
412
413 dt = bt;
414 if(dt < best )
415 {
416 otn = nt;
417 best = RSB_MIN_ABOVE_INF(best,dt,tinf);
418 RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
419 }
420 rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
421 if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
422 }
423 mkl_set_num_threads(ont);
424 done:
425 ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
426 tpo = best; /* tpo = 1.0 / best; */
427 *otnp = otn;
428 }
429 else
430 {
431 dt = -rsb_time();
432 errval = rsb__do_mkl_csr_spmm(VA, m, k, n, nnz, IA, JA, b, ldb, c, ldc, alphap, betap, transA, typecode, flags);
433 dt += rsb_time();
434 /* tpo = 1.0 / dt; */
435 tpo = dt;
436 }
437 if(tpop)
438 *tpop = tpo;
439 return errval;
440 }
441
rsb__do_mkl_csr_spsv(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * x,void * y,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags)442 rsb_err_t rsb__do_mkl_csr_spsv(const void *VA, const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * x, void * y, const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags)
443 {
444 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
445 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
446 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
447 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
448 matdescra[2] = 'n'; // not unit diagonal
449 matdescra[3] = 'c'; // zero based indexing
450
451 #ifdef RSB_NUMERICAL_TYPE_FLOAT
452 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
453 mkl_scsrsv(&transA_mkl,(MKL_INT*)(&m),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)x,(float*)y);
454 else
455 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
456 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
457 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
458 mkl_dcsrsv(&transA_mkl,(MKL_INT*)(&m),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)x,(double*)y);
459 else
460 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
461 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
462 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
463 mkl_ccsrsv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)x,(MKL_Complex8*)y);
464 else
465 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
466 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
467 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
468 mkl_zcsrsv(&transA_mkl,(MKL_INT*)(&m),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)x,(MKL_Complex16*)y);
469 else
470 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
471 return RSB_ERR_UNSUPPORTED_TYPE ;
472 return RSB_ERR_NO_ERROR;
473 }
474
rsb__do_mkl_csr_spsm(const void * VA,const MKL_INT m,const MKL_INT nrhs,const MKL_INT * IA,const MKL_INT * JA,const void * b,void * c,const void * alphap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,const MKL_INT ldb,const MKL_INT ldc)475 rsb_err_t rsb__do_mkl_csr_spsm(const void *VA, const MKL_INT m, const MKL_INT nrhs, const MKL_INT * IA, const MKL_INT *JA, const void * b, void * c, const void *alphap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, const MKL_INT ldb, const MKL_INT ldc)
476 {
477 char transA_mkl = rsb_rsb_to_mkl_trans(transA);
478 char matdescra[]={RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL,RSB_NUL};
479 matdescra[0] = rsb_rsb_to_mkl_sym(flags); // general ?
480 matdescra[1] = rsb_rsb_to_mkl_upl(flags); // up or lo ?
481 matdescra[2] = 'n'; // not unit diagonal
482 matdescra[3] = 'c'; // zero based indexing
483
484 #ifdef RSB_NUMERICAL_TYPE_FLOAT
485 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
486 mkl_scsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(float*)alphap,matdescra,(float*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(float*)b,(MKL_INT*)&ldb,(float*)c,(MKL_INT*)&ldc);
487 else
488 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
489 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
490 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
491 mkl_dcsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(double*)alphap,matdescra,(double*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(double*)b,(MKL_INT*)&ldb,(double*)c,(MKL_INT*)&ldc);
492 else
493 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
494 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
495 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
496 mkl_ccsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_Complex8*)alphap,matdescra,(MKL_Complex8*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex8*)b,(MKL_INT*)&ldb,(MKL_Complex8*)c,(MKL_INT*)&ldc);
497 else
498 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
499 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
500 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
501 mkl_zcsrsm(&transA_mkl,(MKL_INT*)(&m),(MKL_INT*)(&nrhs),(MKL_Complex16*)alphap,matdescra,(MKL_Complex16*)VA,(MKL_INT*)JA,(MKL_INT*)IA,(MKL_INT*)(IA+1),(MKL_Complex16*)b,(MKL_INT*)&ldb,(MKL_Complex16*)c,(MKL_INT*)&ldc);
502 else
503 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
504 return RSB_ERR_UNSUPPORTED_TYPE ;
505 return RSB_ERR_NO_ERROR;
506 }
507
rsb__mkl_csr_spsv_bench(const void * VA,const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const MKL_INT * IA,const MKL_INT * JA,const void * b,void * c,const void * alphap,const void * betap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)508 rsb_err_t rsb__mkl_csr_spsv_bench(const void *VA, const MKL_INT m, const MKL_INT k/*, const MKL_INT n*/, const MKL_INT nnz, const MKL_INT * IA, const MKL_INT *JA, const void * b, /*const MKL_INT ldb,*/ void * c,/* const MKL_INT ldc,*/ const void *alphap, const void * betap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
509 {
510 rsb_err_t errval = RSB_ERR_NO_ERROR;
511 rsb_time_t dt, tpo;
512
513 if(otnp)
514 {
515 RSB_MKL_THREADS_TUNING_ODECLS
516 RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
517
518 for(nt=lnt;nt<=unt;++nt)
519 {
520 RSB_MKL_THREADS_TUNING_IDECLS
521 if(nt) mkl_set_num_threads(nt);
522
523 do
524 {
525 errval = rsb__do_mkl_csr_spsv(VA, m, k, nnz, IA, JA, b, c, alphap, betap, transA, typecode, flags);
526 RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
527 }
528 while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
529
530 dt = bt;
531 if(dt < best )
532 {
533 otn = nt;
534 best = RSB_MIN_ABOVE_INF(best,dt,tinf);
535 RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
536 }
537 rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
538 if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
539 }
540 mkl_set_num_threads(ont);
541 done:
542 ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
543 tpo = best; /* tpo = 1.0 / best; */
544 *otnp = otn;
545 }
546 else
547 {
548 dt = -rsb_time();
549 errval = rsb__do_mkl_csr_spsv(VA, m, k, nnz, IA, JA, b, c, alphap, betap, transA, typecode, flags);
550 dt += rsb_time();
551 /* tpo = 1.0 / dt; */
552 tpo = dt;
553 }
554 if(tpop)
555 *tpop = tpo;
556 return errval;
557 }
558
rsb__mkl_csr_spsm_bench(const void * VA,const MKL_INT m,const MKL_INT nrhs,const MKL_INT * IA,const MKL_INT * JA,const void * b,void * c,const void * alphap,rsb_trans_t transA,rsb_type_t typecode,rsb_flags_t flags,const MKL_INT ldb,const MKL_INT ldc,rsb_thread_t * otnp,rsb_time_t * tpop,struct rsb_tattr_t * ttrp,struct rsb_ts_t * tstp)559 rsb_err_t rsb__mkl_csr_spsm_bench(const void *VA, const MKL_INT m, const MKL_INT nrhs, const MKL_INT * IA, const MKL_INT *JA, const void * b, void * c, const void *alphap, rsb_trans_t transA, rsb_type_t typecode, rsb_flags_t flags, const MKL_INT ldb, const MKL_INT ldc, rsb_thread_t *otnp, rsb_time_t *tpop, struct rsb_tattr_t* ttrp, struct rsb_ts_t*tstp)
560 {
561 rsb_err_t errval = RSB_ERR_NO_ERROR;
562 rsb_time_t dt, tpo;
563
564 if(otnp)
565 {
566 RSB_MKL_THREADS_TUNING_ODECLS
567 RSB_MKL_SET_THREADS_RANGE(lnt,unt,otnp)
568
569 for(nt=lnt;nt<=unt;++nt)
570 {
571 RSB_MKL_THREADS_TUNING_IDECLS
572 if(nt) mkl_set_num_threads(nt);
573
574 do
575 {
576 errval = rsb__do_mkl_csr_spsm(VA, m, nrhs, IA, JA, b, c, alphap, transA, typecode, flags, ldb, ldc);
577 RSB_SAMPLE_STAT(it,ct,dt,tt,bt,wt,ss,tinf,times);
578 }
579 while(RSB_REPEAT(ct-it,times,mint,mintimes,maxt,maxtimes));
580
581 dt = bt;
582 if(dt < best )
583 {
584 otn = nt;
585 best = RSB_MIN_ABOVE_INF(best,dt,tinf);
586 RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp);
587 }
588 rsb__tattr_sets(ttrp,dtn,nt,dt,otn,times);/* FIXME: if no threads tuning, shall set dtpo = btpo, as well as ttrp.optt=0 */
589 if(dtn == nt) RSB_STAT_TAKE(it,otn,ct,dt,tt,bt,wt,ss,times,tstp+1);
590 }
591 mkl_set_num_threads(ont);
592 done:
593 ttrp->ttt += rsb_time(); /* ttrp->ttt = tt; */
594 tpo = best; /* tpo = 1.0 / best; */
595 *otnp = otn;
596 }
597 else
598 {
599 dt = -rsb_time();
600 errval = rsb__do_mkl_csr_spsm(VA, m, nrhs, IA, JA, b, c, alphap, transA, typecode, flags, ldb, ldc);
601 dt += rsb_time();
602 /* tpo = 1.0 / dt; */
603 tpo = dt;
604 }
605 if(tpop)
606 *tpop = tpo;
607 return errval;
608 }
609
rsb__mkl_coo2csr(const MKL_INT m,const MKL_INT k,const MKL_INT nnz,const void * IVA,const MKL_INT * IIA,const MKL_INT * IJA,const void * OVA,const MKL_INT * OIA,const MKL_INT * OJA,rsb_type_t typecode,const MKL_INT mib)610 rsb_err_t rsb__mkl_coo2csr(const MKL_INT m, const MKL_INT k, const MKL_INT nnz, const void *IVA, const MKL_INT * IIA, const MKL_INT *IJA, const void *OVA, const MKL_INT * OIA, const MKL_INT *OJA, rsb_type_t typecode, const MKL_INT mib)
611 {
612 int info;
613 int job[6];
614 job[0] = 1; // coo2csr (=1, the matrix in the coordinate format is converted to the CSR;=2, the matrix in the coordinate format is converted to the CSR format, and the column indices in CSR representation are sorted in the increasing order within each row.)
615 job[1] = mib; // 0 based csr
616 job[2] = 0; // 0 based coo
617 job[3] = 0; // ignored
618 job[4] = nnz; // ignored here
619 job[5] = 0; // fill all three arrays
620
621 #ifdef RSB_NUMERICAL_TYPE_FLOAT
622 if( typecode == RSB_NUMERICAL_TYPE_FLOAT )
623 mkl_scsrcoo(job,(MKL_INT*)(&m),(float*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(float*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
624 else
625 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
626 #ifdef RSB_NUMERICAL_TYPE_DOUBLE
627 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE )
628 mkl_dcsrcoo(job,(MKL_INT*)(&m),(double*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(double*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
629 else
630 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
631 #ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
632 if( typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
633 mkl_ccsrcoo(job,(MKL_INT*)(&m),(MKL_Complex8*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(MKL_Complex8*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
634 else
635 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
636 #ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
637 if( typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
638 mkl_zcsrcoo(job,(MKL_INT*)(&m),(MKL_Complex16*)OVA,(MKL_INT*)OJA,(MKL_INT*)OIA,(MKL_INT*)(&nnz),(MKL_Complex16*)(IVA),(MKL_INT*)IIA,(MKL_INT*)IJA,&info);
639 else
640 #endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
641 return RSB_ERR_UNSUPPORTED_TYPE ;
642 return RSB_ERR_NO_ERROR;
643 }
644
645 #endif /* RSB_WANT_MKL */
646 /* @endcond */
647
648