Lines Matching refs:base_n

30   const Index base_n = 64 * n_block_idx;  in EigenContractionKernelInternal()  local
179 const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \ in EigenContractionKernelInternal()
180 const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \ in EigenContractionKernelInternal()
181 const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \ in EigenContractionKernelInternal()
182 const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \ in EigenContractionKernelInternal()
183 const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \ in EigenContractionKernelInternal()
184 const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \ in EigenContractionKernelInternal()
185 const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \ in EigenContractionKernelInternal()
186 const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \ in EigenContractionKernelInternal()
465 const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); in EigenContractionKernelInternal()
479 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0; in EigenContractionKernelInternal()
480 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1; in EigenContractionKernelInternal()
481 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2; in EigenContractionKernelInternal()
482 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3; in EigenContractionKernelInternal()
483 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4; in EigenContractionKernelInternal()
484 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5; in EigenContractionKernelInternal()
485 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6; in EigenContractionKernelInternal()
486 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7; in EigenContractionKernelInternal()
491 output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val; in EigenContractionKernelInternal()
513 const Index base_n = 64 * n_block_idx; in EigenContractionKernel() local
515 if (base_m + 63 < m_size && base_n + 63 < n_size) { in EigenContractionKernel()
531 const Index base_m, const Index base_n) { in EigenFloatContractionKernelInternal16x16() argument
576 Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n; in EigenFloatContractionKernelInternal16x16()
691 Index horiz_base = threadIdx.y*4+base_n; in EigenFloatContractionKernelInternal16x16()
768 const Index base_m, const Index base_n) { in EigenFloatContractionKernelInternal() argument
907 Index rhs_horiz0 = threadIdx.y*2+base_n; in EigenFloatContractionKernelInternal()
908 Index rhs_horiz1 = threadIdx.y*2+1+base_n; in EigenFloatContractionKernelInternal()
1067 Index horiz_base = (threadIdx.y/4)*8+base_n; in EigenFloatContractionKernelInternal()
1147 const Index base_n = 64 * n_block_idx; in EigenFloatContractionKernel() local
1149 bool check_rhs = (base_n + 63) >= n_size; in EigenFloatContractionKernel()
1156 …utput, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel()
1159 …utput, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel()
1165 …utput, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel()
1168 …utput, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel()
1187 const Index base_n = 64 * n_block_idx; in EigenFloatContractionKernel16x16() local
1190 if (base_n + 63 < n_size) { in EigenFloatContractionKernel16x16()
1191 …per, false, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel16x16()
1193 …pper, false, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel16x16()
1196 if (base_n + 63 < n_size) { in EigenFloatContractionKernel16x16()
1197 …pper, true, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel16x16()
1199 …apper, true, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); in EigenFloatContractionKernel16x16()