1dnl 2dnl 3dnl @author: Michele Martone 4dnl 5/* @cond INNERDOC */ 6/*! 7 @file 8 @brief 9 10 Performance info gathering code. (OBSOLETE) 11 */ 12dnl 13include(`rsb_misc.m4')dnl 14dnl 15RSB_M4_HEADER_MESSAGE()dnl 16dnl 17ifdef(`ONLY_WANT_HEADERS',` 18#ifndef RSB_BENCH_H_INCLUDED 19#define RSB_BENCH_H_INCLUDED 20') 21dnl 22include(`do_unroll.m4')dnl 23include(`rsb_krnl_vb_macros.m4')dnl 24include(`rsb_krnl_macros.m4')dnl 25dnl 26#ifdef __cplusplus 27extern "C" { 28#endif /* __cplusplus */ 29dnl 30#include "rsb_internals.h" 31dnl 32#ifdef RSB_HAVE_CBLAS_H 33#include <cblas.h> 34#endif /* RSB_HAVE_CBLAS_H */ 35#ifdef RSB_HAVE_CLAPACK_H 36#include <clapack.h> 37#endif /* RSB_HAVE_CLAPACK_H */ 38#include <math.h> 39dnl 40dnl 41dnl 42dnl RSB_M4_HYPERBOLIC_FITTING_FUNCTION_ARGS() 43dnl --------------------------------- 44dnl 45define(`RSB_M4_HYPERBOLIC_FITTING_FUNCTION_ARGS',`dnl 46dnl 47`(double x[], double y[], size_t nb_loop, double * a, double * b, double *c, double c_s)'dnl 48dnl 49')dnl 50dnl 51dnl 52dnl RSB_M4_HYPERBOLIC_FITTING_FUNCTION_IDENTIFIER() 53dnl --------------------------------- 54dnl 55define(`RSB_M4_HYPERBOLIC_FITTING_FUNCTION_IDENTIFIER',`dnl 56dnl 57`rsb_fit_hyp'dnl 58dnl 59')dnl 60dnl 61dnl 62dnl 63dnl RSB_M4_HYPERBOLIC_FITTING_FUNCTION() 64dnl --------------------------------- 65dnl 66define(`RSB_M4_HYPERBOLIC_FITTING_FUNCTION',`dnl 67dnl 68rsb_err_t RSB_M4_HYPERBOLIC_FITTING_FUNCTION_IDENTIFIER()`'dnl 69RSB_M4_HYPERBOLIC_FITTING_FUNCTION_ARGS()`'dnl 70ifdef(`ONLY_WANT_HEADERS',`; 71',` 72{ 73#if !(RSB_HAVE_CLAPACK && RSB_HAVE_CBLAS) 74 return RSB_ERR_UNSUPPORTED_OPERATION; 75#else 76 /** 77 * \ingroup gr_bench 78 * Note : 79 * 80 * This function will compute a performance predictor based on 81 * nonzero per row ratio, by fitting the two input x (non zeros per row) 82 * and y (megaflops) vectors (both with n = RSB_FITTING_SAMPLES points) to 83 * the following formula : 84 * 85 * `megaflops (nnz_per_row) a + b / ( c + nnz_per_row )' 86 * 87 * The c_s and nb_loop arguments will be documented some day. 88 * 89 * This model is discussed in the following article : 90 91@article{ButtEijkLang:spmvp, 92 title = {Performance Optimization and Modeling of Blocked Sparse Kernels}, 93 author = {Buttari, Alfredo and Eijkhout, Victor and Langou, Julien and Filippone, Salvatore}, 94 pages = {467--484}, 95 year = 2007, 96 journal = {IJHPCA}, 97 volume = 21, 98 url = {\url{{http://www.tacc.utexas.edu/~eijkhout/Articles/2007-buttari-spmvp.pdf}}} 99} 100 * 101 */ 102 103 rsb_int nparms=3; 104 rsb_int n = RSB_FITTING_SAMPLES; 105 /* Fortran arrays */ 106#define RSB_FORTRAN_ARRAY(AI,ROWS,COLS) AI[(ROWS)*(COLS)] 107 108 rsb_int nj = 3; 109 rsb_int i,j; 110 rsb_err_t errval = RSB_ERR_NO_ERROR; 111 112 double RSB_FORTRAN_ARRAY(G ,n,3); 113 double RSB_FORTRAN_ARRAY(G1,n,3); 114 double RSB_FORTRAN_ARRAY(GG,3,3); 115 double RSB_FORTRAN_ARRAY(z ,n,1); 116 double RSB_FORTRAN_ARRAY(z0,n,1); 117 double RSB_FORTRAN_ARRAY(dy,n,1); 118 double RSB_FORTRAN_ARRAY(ddy,3,1); 119 double RSB_FORTRAN_ARRAY(xj ,nj,1); 120 double RSB_FORTRAN_ARRAY(yj ,nj,1); 121 double RSB_FORTRAN_ARRAY(zj ,nj,1); 122 123 double xcpy[n]; 124 double a_t,b_t,sum1,sum2,sum3,sum4,error,tmp_a,tmp_b,tmp_c, min_err,max,min,avg,intl; 125 int /*i,*/info,ipivot[3],/*nj,j,*/k,cnt; 126 rsb__memcpy(xcpy,x,sizeof(xcpy)); /* not a bit more .. and please note that sizeof(x)=sizeof(double*) != sizeof(x[n])*/ 127 128 129 RSB_INFO("starting analysis...\n"); 130 RSB_STDOUT("\n"); 131 RSB_STDOUT("performance data:\n"); 132 for(i=0;i<n;++i) 133 { 134 RSB_STDOUT("%lg %lg\n",xcpy[i],y[i]); 135 } 136 137 sum1=0; 138 sum2=0; 139 sum3=0; 140 sum4=0; 141 142 143 *a=y[n-1]; 144 145 rsb__memcpy(xj,x,sizeof(xj)); /* not a bit more */ 146 rsb__memcpy(yj,y,sizeof(yj)); /* not a bit more */ 147 148 for(i=0;i<nj;++i) 149 { 150 zj[i]=yj[i]-*a; 151 zj[i]=1/zj[i]; 152 } 153 154 for(i=0;i<nj;++i) 155 { 156 sum1=sum1 + xj[i]*zj[i]; 157 sum2=sum2 + xj[i]; 158 sum3=sum3 + zj[i]; 159 sum4=sum4 + xj[i]*xj[i]; 160 } 161 162 a_t= (sum3*sum4-sum2*sum1)/(nj*sum4-sum2*sum2); 163 b_t=(nj*sum1 - sum2*sum3) / (nj*sum4 - sum2*sum2); 164 165 *b=1/b_t; 166 *c=a_t* *b; 167 168 for(i=0;i<n;++i) 169 z0[i]= *a +*b/(x[i]+*c); 170 171 error = 0; 172 for(j=0;j<n;++j) 173 error = error + (fabs( z0[j] - y[j] ) / y[j] ); 174 175 error = error / n * 100; 176 177 min_err=error; 178 179 tmp_a=*a; 180 tmp_b=*b; 181 tmp_c=*c; 182 183 for(i=0;i<nb_loop;++i) 184 { 185 for(j=0;j<n;++j) 186 dy[j] = z0[j]-y[j]; 187 188 for(j=0;j<n;++j) 189 { 190 G[j+0*n]=1; 191 G[j+1*n]=1/(x[j]+tmp_c); 192 G[j+2*n]=-tmp_b/( (x[j]+tmp_c)*(x[j]+tmp_c) ); 193 194 G1[j+0*n]= G[j+0*n]; 195 G1[j+1*n]= G[j+1*n]; 196 G1[j+2*n]= G[j+2*n]; 197 } 198 199#if 200 cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,3,3,n,1.0,G,n,G1,n,0.0,GG,3); 201 errval = clapack_dgetrf(CblasColMajor,3,3,GG,3,ipivot); 202 if(RSB_SOME_ERROR(errval)) goto err; 203 cblas_dgemv(CblasColMajor,CblasTrans,n,3,1.0,G,n,dy,1,0.0,ddy,1); 204 errval = clapack_dgetrs(CblasColMajor,CblasNoTrans,3,1,GG,3,ipivot,ddy,3); 205 if(RSB_SOME_ERROR(errval)) goto err; 206#else /* (RSB_HAVE_CLAPACK && RSB_HAVE_CBLAS) */ 207#endif /* (RSB_HAVE_CLAPACK && RSB_HAVE_CBLAS) */ 208 209 tmp_a = tmp_a-ddy[1-1]; 210 tmp_b = tmp_b-ddy[2-1]; 211 tmp_c = tmp_c-ddy[3-1]; 212 213 for(j=0;j<n;++j) 214 z0[j]= tmp_a +tmp_b/(x[j]+tmp_c); 215 216 error = 0; 217 for(j=0;j<n;++j) 218 error = error + (fabs( z0[j] - y[j] ) / y[j] ); 219 220 error = error / n * 100; 221 if(error < min_err) 222 { 223 *a=tmp_a; 224 *b=tmp_b; 225 *c=tmp_c; 226 } 227 } 228 229 if((*c< 0) && (*c < c_s)) 230 { 231 *c=10000; 232 *b=10000; 233 avg=0; 234 max=y[0]; 235 min=y[0]; 236 for(i=0;i<n;++i) 237 { 238 if (y[i] > max) max=y[i]; 239 if (y[i] < min) min=y[i]; 240 avg=avg+y[i]; 241 } 242 avg=avg/(double)(n); 243 *a=avg; 244 intl=max-min; 245 avg=0; 246 cnt=0; 247 for(/*i=0*/;i<n;++i) 248 //for(i=0;i<n;++i) 249 { 250 if (fabs(y[i]-avg) < (0.3*intl)) 251 { 252 avg = avg + y[i]; 253 cnt=cnt+1; 254 } 255 } 256 if(cnt > 0) *a=avg/(double)cnt; 257 } 258 else 259 if (*b >= 0) 260 { 261 *c=10000; 262 *b=10000; 263 avg=0; 264 max=y[0]; 265 min=y[0]; 266 for(i=0;i<n;++i) 267 { 268 if (y[i] > max) max=y[i]; 269 if (y[i] < min) min=y[i]; 270 avg=avg+y[i]; 271 } 272 avg=avg/(double)n; 273 intl=max-min; 274 avg=0; 275 cnt=0; 276 //for(i=0;i<n;++i) 277 for(/*i=0*/;i<n;++i) 278 { 279 if (fabs(y[i]-avg) < (0.3*intl)) 280 { 281 avg = avg + y[i]; 282 cnt=cnt+1; 283 } 284 } 285 if(cnt > 0) *a=avg/ (double) cnt; 286 } 287 288 289 RSB_STDOUT("\n"); 290 RSB_STDOUT("alpha:%lg beta:%lg gamma:%lg\n",*a,*b,*c); 291 292 RSB_STDOUT("\nfitting:\n"); 293 for(i=0;i<n;++i) 294 { 295 RSB_STDOUT("%lg %lg\n", xcpy[i], *a+*b/(xcpy[i]+*c)); 296 } 297 298 return RSB_ERR_NO_ERROR; 299 err: 300 RSB_DO_ERR_RETURN(errval) 301#endif /* RSB_HAVE_CLAPACK && RSB_HAVE_CBLAS */ 302} 303')dnl 304')dnl 305dnl 306dnl 307dnl 308dnl 309dnl RSB_M4_REFERENCEBENCHMARK_FUNCTION_ARGS() 310dnl ------------------------------------------------------------------ 311dnl 312define(`RSB_M4_REFERENCEBENCHMARK_FUNCTION_ARGS',`dnl 313dnl 314`(void)'dnl 315dnl 316')dnl 317dnl 318dnl 319dnl 320dnl RSB_M4_REFERENCEBENCHMARK_FUNCTION_IDENTIFIER() 321dnl -------------------------------------------- 322dnl 323define(`RSB_M4_REFERENCEBENCHMARK_FUNCTION_IDENTIFIER',`dnl 324dnl 325`rsb__do_referencebenchmark'dnl 326dnl 327dnl 328')dnl 329dnl 330dnl 331dnl 332dnl RSB_M4_REFERENCEBENCHMARK_FUNCTION_NAME() 333dnl -------------------------------------- 334dnl 335define(`RSB_M4_REFERENCEBENCHMARK_FUNCTION_NAME',`dnl 336dnl 337rsb_err_t RSB_M4_REFERENCEBENCHMARK_FUNCTION_IDENTIFIER`'dnl 338dnl 339dnl 340')dnl 341dnl 342dnl 343dnl 344dnl RSB_M4_REFERENCEBENCHMARK_FUNCTION() 345dnl --------------------------------- 346dnl 347define(`RSB_M4_REFERENCEBENCHMARK_FUNCTION',`dnl 348dnl 349RSB_M4_REFERENCEBENCHMARK_FUNCTION_NAME`'dnl 350RSB_M4_REFERENCEBENCHMARK_FUNCTION_ARGS`'dnl 351ifdef(`ONLY_WANT_HEADERS',`; 352',` 353{ 354 /*! 355 * \ingroup gr_bench 356 * A complete benchmark program. 357 * Will benchmark all supported matrix operations over all supported types 358 * over all supported matrix partitionings. 359 * 360 * Moreover, it WILL perform analysis of performance data and results dumput. 361 * 362 * \return \rsb_errval_inp_param_msg 363 * 364 * FIXME : UNFINISHED: should process and dump this info in a header file. 365 */ 366 struct rsb_global_reference_performance_info_t grpi; 367 rsb_err_t errval = RSB_ERR_NO_ERROR; 368 rsb_blk_idx_t ri,ci; /* row index, columns index */ 369 rsb_coo_idx_t order=20000; 370 rsb_coo_idx_t rows=order,cols=order; /* FIXME : TEMPORARY */ 371 rsb_blk_idx_t rua[] = RSB_ROWS_UNROLL_ARRAY; 372 rsb_blk_idx_t cua[] = RSB_COLUMNS_UNROLL_ARRAY; 373 double tot_secs=0.0,pred_secs=1.0; 374 rsb_trans_t transA = RSB_DEFAULT_TRANSPOSITION; 375 size_t kernels_n = RSB_ROWS_UNROLL_ARRAY_LENGTH*RSB_COLUMNS_UNROLL_ARRAY_LENGTH*RSB_IMPLEMENTED_MOPS*RSB_IMPLEMENTED_TYPES; 376 rsb_int ti=0; /* type index */ 377 int fbw,bwi; 378 RSB_BZERO_P(&grpi); 379 380 /* if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))){goto err;} we skip this to enable calling this from within our library (FIXME) */ 381 382 if(RSB_FITTING_SAMPLES<2) 383 { 384 fbw=(RSB_FIRST_FITTING_SAMPLE_BW_MAX + RSB_FIRST_FITTING_SAMPLE_BW_MIN)/2; 385 bwi=fbw; 386 } 387 else 388 { 389 fbw = RSB_FIRST_FITTING_SAMPLE_BW_MIN; 390 bwi=(RSB_FIRST_FITTING_SAMPLE_BW_MAX - RSB_FIRST_FITTING_SAMPLE_BW_MIN)/(RSB_FITTING_SAMPLES-1); 391 } 392 393 tot_secs = -rsb_time(); 394 pred_secs *= RSB_ROWS_UNROLL_ARRAY_LENGTH * RSB_COLUMNS_UNROLL_ARRAY_LENGTH * RSB_FITTING_SAMPLES * RSB_IMPLEMENTED_META_MOPS * RSB_IMPLEMENTED_TYPES * RSB_BENCHMARK_MIN_SECONDS; 395 RSB_STDERR("#reference benchmarking of %zd kernels (no transposed, no symmetric, and so on) should take at least %lg seconds..\n",kernels_n,pred_secs); 396 397foreach(`mtype',RSB_M4_MATRIX_TYPES,`dnl 398 /* mtype type benchmarking */ 399/* RSB_INFO("#mtype type benchmarking\n");*/ 400 for(ri=0;ri<RSB_ROWS_UNROLL_ARRAY_LENGTH;++ri) 401 { 402 for(ci=0;ci<RSB_COLUMNS_UNROLL_ARRAY_LENGTH;++ci) 403 { 404 rsb_blk_idx_t br = rua[ri]; 405 rsb_blk_idx_t bc = cua[ci]; 406 rsb_coo_idx_t bw,mbw=(cols/bc); 407 rsb_int si=0; /* sample index */ 408 mbw=(cols-bc)/bc; /* tune here to fill further our matrix */ 409 /* FIXME : there is the danger of empty samples! */ 410 for(bw=fbw;bw<=mbw && si< RSB_FITTING_SAMPLES ;bw+=bwi) /* this parameter should be tunable, too */ 411 { 412 //RSB_INFO("bw = %d\n",bw); 413 rsb_int moi=0; /* matrix operation index */ 414 double time,*timep=&time; 415 struct rsb_mtx_t * mtxAp = 416 rsb__generate_blocked_banded(br,bc,rows,cols,bw,timep,RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype),RSB_BOOL_TRUE ); /* FIXME : generating triangular factors always ! */ 417 if(!mtxAp) 418 { 419 RSB_STDERR(RSB_ERRM_IE); 420 {errval = RSB_ERR_GENERIC_ERROR; goto err;} 421 } 422dnl struct rsb_options_t * o = mtxAp->options; 423 424foreach(`mop',RSB_M4_MATRIX_META_OPS,`dnl 425 { 426/* RSB_INFO("#mtype type, ");*/ 427/* RSB_INFO("mop operation benchmarking\n");*/ 428 /* mop operation benchmarking */ 429ifelse(RSB_M4_IS_SPXX_TWO_VECTORS_OPERATING_KERNEL_MOP(mop),1,`dnl 430 mtype *out=NULL,*rhs=NULL; 431')dnl 432ifelse(RSB_M4_IS_SPXX_OP_SCALING_KERNEL_MOP(mop),1,`dnl 433 double alpha=1.0;/* FIXME */ 434 double * alphap = α 435')dnl 436ifelse(RSB_M4_IS_SPXX_SCALING_KERNEL_MOP(mop),1,`dnl 437 double beta =1.0;/* FIXME */ 438 double * betap = &beta ; 439')dnl 440ifelse(RSB_M4_IS_ACC_WRITING_KERNEL_MOP(mop),`1',`dnl 441 mtype * row_sums; 442')dnl 443 444 445ifelse(RSB_M4_IS_ACC_WRITING_KERNEL_MOP(mop),`1',`dnl 446 row_sums = rsb__malloc(mtxAp->el_size*(rows+br)); 447 if(!row_sums) {errval = RSB_ERR_ENOMEM;goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 448 if(rsb__fill_with_ones(row_sums,mtxAp->typecode,cols,1)) {errval = RSB_ERR_ENOMEM;goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 449')dnl 450ifelse(mop,`scale',`dnl 451 mtype * scale_factors = rsb__malloc(mtxAp->el_size*(rows+br)); 452 if(!scale_factors) {errval = RSB_ERR_ENOMEM;goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 453 if(rsb__fill_with_ones(scale_factors,mtxAp->typecode,rows,1)) {errval = RSB_ERR_ENOMEM;goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 454')dnl 455ifelse(RSB_M4_IS_ACC_WRITING_KERNEL_MOP(mop),`1',`dnl 456')dnl 457ifelse(RSB_M4_IS_SPXX_TWO_VECTORS_OPERATING_KERNEL_MOP(mop),1,`dnl 458 rsb_coo_idx_t nrhs=4; 459 rsb_coo_idx_t bstride = cols+bc; 460 rsb_coo_idx_t cstride = rows+br; 461ifelse(RSB_M4_IS_STRIDED_KERNEL_MOP(mop),1,`dnl 462 rsb_coo_idx_t incx=1,incy=1; 463',`dnl 464 rsb_coo_idx_t incx=1,incy=1; 465')dnl 466 incx=1,incy=1; /* this is just a pacifier for "unused variable"-like warnings */ 467 rhs = rsb__malloc(mtxAp->el_size*(bstride)*nrhs); 468 out = rsb__malloc(mtxAp->el_size*(cstride)*nrhs); 469 if(!out || rsb__fill_with_ones(out,mtxAp->typecode,cstride*nrhs,1)){errval = RSB_ERR_ENOMEM;goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 470 if(!rhs || rsb__fill_with_ones(rhs,mtxAp->typecode,bstride*nrhs,1)){errval = RSB_ERR_ENOMEM;goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 471')dnl 472ifelse(mop,`negation',`dnl 473 int please_fix_RSB_M4_ARGS_TO_ACTUAL_ARGS=-1;/* here to fix negation */ 474')dnl 475 476 grpi.gpi[ti].pipmo[moi].blocks_per_row[si]=bw*bc; /* FIXME : TEMPORARY !! */ 477 478 /* we benchmark our mtype library implementation for operation mop */ 479 grpi.gpi[ti].pipmo[moi].pipfs[si].seconds[ri][ci] = RSB_BENCHMARK_MIN_SECONDS; /* min seconds */ 480 grpi.gpi[ti].pipmo[moi].pipfs[si].m_flops[ri][ci] = (double)RSB_BENCHMARK_MIN_RUNS; /* min runs */ 481 482 errval = dnl 483ifelse(RSB_M4_MATRIX_OP_IS_META_OP(mop),`1',dnl 484`0;/* meta-op : we already measured matrix creation time */ 485grpi.gpi[ti].pipmo[moi].pipfs[si].seconds[ri][ci]=time; 486grpi.gpi[ti].pipmo[moi].pipfs[si].m_flops[ri][ci]=((double)rsb__do_get_matrix_nnz(mtxAp))/1000000; 487/* FIXME : this is experimental and unfinished code */ 488',` 489RSB_M4_DIRECT_KERNEL_DISPATCH_BENCHMARK_FUNCTION_IDENTIFIER(mop,mtype)(dnl 490&(grpi.gpi[ti].pipmo[moi].pipfs[si].seconds[ri][ci]),dnl 491&(grpi.gpi[ti].pipmo[moi].pipfs[si].m_flops[ri][ci]),dnl 492RSB_M4_DIRECT_KERNEL_DISPATCH_TIMING_FUNCTION_ACTUAL_ARGS(mop,mtype));') 493 494 grpi.gpi[ti].pipmo[moi].pipfs[si].fillin[ri][ci] = rsb__do_get_matrix_fillin(mtxAp); 495 grpi.gpi[ti].pipmo[moi].pipfs[si].rows = rows; 496 grpi.gpi[ti].pipmo[moi].pipfs[si].cols = cols; 497 grpi.gpi[ti].pipmo[moi].pipfs[si].nnz = rsb__do_get_matrix_nnz(mtxAp) ; 498 grpi.gpi[ti].pipmo[moi].pipfs[si].flags= mtxAp->flags ; 499 grpi.gpi[ti].pipmo[moi].pipfs[si].storage= mtxAp->matrix_storage ; 500 grpi.gpi[ti].pipmo[moi].pipfs[si].typecode= mtxAp->typecode ; 501 grpi.gpi[ti].pipmo[moi].pipfs[si].element_count= mtxAp->element_count; 502 503 grpi.gpi[ti].pipmo[moi].pipfs[si].e_mflops[ri][ci] = 504 grpi.gpi[ti].pipmo[moi].pipfs[si].m_flops[ri][ci] / 505 grpi.gpi[ti].pipmo[moi].pipfs[si].fillin[ri][ci]; 506 507 if(RSB_SOME_ERROR(errval)){goto erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop;} 508 ++moi; 509 510 erri_`'RSB_M4_CHOPSPACES(mtype)`'`_'`'mop: 511 if(RSB_SOME_ERROR(errval))goto err; 512 513 RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS 514ifelse(RSB_M4_IS_ACC_WRITING_KERNEL_MOP(mop),`1',`dnl 515 RSB_CONDITIONAL_FREE(row_sums); 516')dnl 517ifelse(RSB_M4_IS_SPXX_TWO_VECTORS_OPERATING_KERNEL_MOP(mop),1,`dnl 518 RSB_CONDITIONAL_FREE(out); 519 RSB_CONDITIONAL_FREE(rhs); 520')dnl 521ifelse(mop,`scale',`dnl 522 RSB_CONDITIONAL_FREE(scale_factors); 523')dnl 524 } 525')dnl 526 RSB_MTX_FREE(mtxAp); 527 ++si; 528 } 529 } 530 } 531 { 532 rsb_int moi; 533 rsb_char_t * mops[] = RSB_M4_MATRIX_META_OPS_ARRAY; 534 rsb_char_t * types[] = RSB_M4_MATRIX_TYPES_ARRAY; 535 rsb_char_t s[RSB_M4_BUFLEN]; 536 rsb__print_mop_reference_performance_info_header(); 537 for(moi=0;moi<RSB_IMPLEMENTED_META_MOPS;++moi) 538 { 539/* rsb_int si;*/ 540 /* informational printout */ 541 sprintf(s,"%s\t%s\t",types[ti], mops[moi]); 542 rsb__print_mop_reference_performance_info(&(grpi.gpi[ti].pipmo[moi]),s); 543/* for(si=0;si<RSB_FITTING_SAMPLES;++si)*/ 544/* rsb__dump_performance_info(&(grpi.gpi[ti].pipmo[moi].pipfs[si]), NULL);*/ 545 } 546 } 547 ++ti; 548')dnl 549 tot_secs += rsb_time(); 550 RSB_STDERR("#reference benchmarking took %lg seconds (predicted %lg :)....\n",tot_secs,pred_secs); 551 552 grpi.initialized=1; /* FIXME : only partially */ 553 //rsb__dump_global_reference_performance_info(&grpi); 554#if RSB_WANT_PERFORMANCE_FILE 555 rsb__save_global_reference_performance_info(&grpi); 556#endif /* RSB_WANT_PERFORMANCE_FILE */ 557 return RSB_ERR_NO_ERROR; /* FIXME : temporary */ 558 559 ti=0; /* type index */ 560 for(ti=0;ti<RSB_IMPLEMENTED_TYPES ;++ti) 561 for(ri=0;ri<RSB_ROWS_UNROLL_ARRAY_LENGTH;++ri) 562 { 563 for(ci=0;ci<RSB_COLUMNS_UNROLL_ARRAY_LENGTH;++ci) 564 { 565 rsb_blk_idx_t bc = cua[ci]; 566 rsb_int moi=0; /* matrix operation index */ 567 for(moi=0;moi<RSB_IMPLEMENTED_META_MOPS ;++moi) 568 { 569 rsb_int si=0; /* sample index */ 570 571 double y[RSB_FITTING_SAMPLES]; 572 double * x = grpi.gpi[ti].pipmo[moi].blocks_per_row; 573 574 for(si=0;si< RSB_FITTING_SAMPLES ;++si) 575 { 576 /* we tune our mtype library implementation for operation mop */ 577 y[si] = 578 grpi.gpi[ti].pipmo[moi].pipfs[si].m_flops[ri][ci]/ 579 grpi.gpi[ti].pipmo[moi].pipfs[si].seconds[ri][ci]; 580 } 581 582 /* 583 * FIXME : make this fitting analysis offline respect our benchmark! 584 */ 585 errval = RSB_M4_HYPERBOLIC_FITTING_FUNCTION_IDENTIFIER()( 586 x, y, 3, 587 &(grpi.gpi[ti].pipmo[moi].alpha[ri][ci]), 588 &(grpi.gpi[ti].pipmo[moi].beta [ri][ci]), 589 &(grpi.gpi[ti].pipmo[moi].gamma[ri][ci]), (double)bc 590 /* FIXME : is this right ?*/ 591 ); 592 if(RSB_SOME_ERROR(errval))goto err; 593 } 594 } 595 } 596 597 if( rsb_lib_exit(RSB_NULL_EXIT_OPTIONS) ) 598 return RSB_ERR_INTERNAL_ERROR; 599 600 return RSB_ERR_NO_ERROR; 601err: 602 RSB_DO_ERR_RETURN(errval) 603} 604')dnl 605')dnl 606dnl 607dnl 608dnl 609dnl 610dnl 611RSB_M4_HYPERBOLIC_FITTING_FUNCTION() 612RSB_M4_REFERENCEBENCHMARK_FUNCTION() 613dnl 614dnl 615dnl 616dnl 617dnl 618dnl 619dnl 620#ifdef __cplusplus 621} 622#endif /* __cplusplus */ 623dnl 624dnl 625ifdef(`ONLY_WANT_HEADERS',` 626#endif /* RSB_BENCH_H_INCLUDED */ 627') 628/* @endcond */ 629dnl 630