CON(fvinomatchg32_coars_kernel_,ndim)1 __global__ void CON(fvinomatchg32_coars_kernel_, ndim)  (const type *   A, type * B,
2 		const int * __restrict__ lda_s, const int* __restrict__ ldb_s, const int* __restrict__ idx_s
3 		,const int acoars, const int bcoars
4 		, const int inputrem, const int outputrem, const int lda_kernel1, const int ldb_kernel1, const int remainder1, const int remainder2, const int size)
5 {
6 	int i=0;
7 	int aexpr = 0, bexpr = 0;
8 	int idx = blockIdx.x;
9 	int iip2=-1, ii1=-1;
10 	int  tmp = idx/idx_s[i];
11 	int index = idx - tmp * idx_s[i];
12 	aexpr += index * lda_s[i];
13 	bexpr += index * ldb_s[i];
14 	idx = tmp;
15 	ii1 = index;
16 	i++;
17 	tmp = idx/idx_s[i];
18 	index = idx - tmp * idx_s[i];
19 	aexpr += index * lda_s[i];
20 	bexpr += index * ldb_s[i];
21 	idx = tmp;
22 	iip2 = index;
23 	for(i = 2; i < ndim; i++)
24 	{
25 
26 		int  tmp = idx/idx_s[i];
27 		int index = idx - tmp * idx_s[i];
28 		aexpr += index * lda_s[i];
29 		bexpr += index * ldb_s[i];
30 		idx = tmp;
31 	}
32 	const double *Atmp = A + aexpr;
33 	double *Btmp = B + bexpr;
34 	if(ii1 < inputrem && iip2 < outputrem)
35 	{
36 		fvinomatchg32_main_coars(Atmp,Btmp, lda_kernel1, ldb_kernel1, acoars, bcoars, size );
37 	}
38 	else if(ii1 >= inputrem && iip2 < outputrem)
39 	{
40 		fvinomatchg32_rem_coars(Atmp,Btmp, lda_kernel1, ldb_kernel1 ,remainder1, 32, acoars, bcoars, size);
41 	}
42 	else if(iip2 >= outputrem && ii1 < inputrem)
43 	{
44 		fvinomatchg32_rem_coars(Atmp,Btmp, lda_kernel1, ldb_kernel1, 32, remainder2, acoars, bcoars, size);
45 	}
46 	else
47 	{
48 		fvinomatchg32_rem_coars(Atmp,Btmp,lda_kernel1, ldb_kernel1,remainder1,remainder2,acoars, bcoars, size);
49 	}
50 
51 	return;
52 
53 }
54 #undef ndim
55 
56