CON(fvinomatchg32_coars_kernel_,ndim)1 __global__ void CON(fvinomatchg32_coars_kernel_, ndim) (const type * A, type * B,
2 const int * __restrict__ lda_s, const int* __restrict__ ldb_s, const int* __restrict__ idx_s
3 ,const int acoars, const int bcoars
4 , const int inputrem, const int outputrem, const int lda_kernel1, const int ldb_kernel1, const int remainder1, const int remainder2, const int size)
5 {
6 int i=0;
7 int aexpr = 0, bexpr = 0;
8 int idx = blockIdx.x;
9 int iip2=-1, ii1=-1;
10 int tmp = idx/idx_s[i];
11 int index = idx - tmp * idx_s[i];
12 aexpr += index * lda_s[i];
13 bexpr += index * ldb_s[i];
14 idx = tmp;
15 ii1 = index;
16 i++;
17 tmp = idx/idx_s[i];
18 index = idx - tmp * idx_s[i];
19 aexpr += index * lda_s[i];
20 bexpr += index * ldb_s[i];
21 idx = tmp;
22 iip2 = index;
23 for(i = 2; i < ndim; i++)
24 {
25
26 int tmp = idx/idx_s[i];
27 int index = idx - tmp * idx_s[i];
28 aexpr += index * lda_s[i];
29 bexpr += index * ldb_s[i];
30 idx = tmp;
31 }
32 const double *Atmp = A + aexpr;
33 double *Btmp = B + bexpr;
34 if(ii1 < inputrem && iip2 < outputrem)
35 {
36 fvinomatchg32_main_coars(Atmp,Btmp, lda_kernel1, ldb_kernel1, acoars, bcoars, size );
37 }
38 else if(ii1 >= inputrem && iip2 < outputrem)
39 {
40 fvinomatchg32_rem_coars(Atmp,Btmp, lda_kernel1, ldb_kernel1 ,remainder1, 32, acoars, bcoars, size);
41 }
42 else if(iip2 >= outputrem && ii1 < inputrem)
43 {
44 fvinomatchg32_rem_coars(Atmp,Btmp, lda_kernel1, ldb_kernel1, 32, remainder2, acoars, bcoars, size);
45 }
46 else
47 {
48 fvinomatchg32_rem_coars(Atmp,Btmp,lda_kernel1, ldb_kernel1,remainder1,remainder2,acoars, bcoars, size);
49 }
50
51 return;
52
53 }
54 #undef ndim
55
56