Home
last modified time | relevance | path

Searched refs:B_ptrs (Results 1 – 13 of 13) sorted by relevance

/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/gpu/ocl/gemm/
H A Dgen9_gemm_nocopy_f16.cl227 global half *B_ptrs[2] = {B, B + 16 * ldb};
250 b[j] = VLOAD4_ALIGNED(0, (B_ptrs[j] + h));
264 b[j] = B_ptrs[j][h];
294 if (jrem > j * 16) b[j] = B_ptrs[j][h];
414 (global ushort *)(B_ptrs[0] + h * ldb)));
438 if (jrem > 0) b[0].s0 = B_ptrs[0][h * ldb + lid];
524 global half *B_ptrs[2] = {B, B + 16 * ldb};
542 b[z] = VLOAD4_ALIGNED(0, (B_ptrs[z] + h));
554 b[z] = B_ptrs[z][h];
577 if (jrem > z * 16) b[z] = B_ptrs[z][h];
[all …]
H A Dgen9_gemm_nocopy_f32.cl574 global float *B_ptrs[2] = {B, B + 16 * ldb};
592 if (jrem > hh * 16) b[hh] = vload4(0, B_ptrs[hh]);
593 B_ptrs[hh] += 4;
608 if (jrem > hh * 16) b[hh] = *B_ptrs[hh];
609 B_ptrs[hh]++;
/dports/math/onednn/oneDNN-2.5.1/src/gpu/ocl/gemm/
H A Dgen9_gemm_nocopy_f16.cl227 global half *B_ptrs[2] = {B, B + 16 * ldb};
250 b[j] = VLOAD4_ALIGNED(0, (B_ptrs[j] + h));
264 b[j] = B_ptrs[j][h];
294 if (jrem > j * 16) b[j] = B_ptrs[j][h];
414 (global ushort *)(B_ptrs[0] + h * ldb)));
438 if (jrem > 0) b[0].s0 = B_ptrs[0][h * ldb + lid];
524 global half *B_ptrs[2] = {B, B + 16 * ldb};
542 b[z] = VLOAD4_ALIGNED(0, (B_ptrs[z] + h));
554 b[z] = B_ptrs[z][h];
577 if (jrem > z * 16) b[z] = B_ptrs[z][h];
[all …]
H A Dgen9_gemm_nocopy_f32.cl574 global float *B_ptrs[2] = {B, B + 16 * ldb};
592 if (jrem > hh * 16) b[hh] = vload4(0, B_ptrs[hh]);
593 B_ptrs[hh] += 4;
608 if (jrem > hh * 16) b[hh] = *B_ptrs[hh];
609 B_ptrs[hh]++;
/dports/math/libxsmm/libxsmm-1.16.3/src/template/
H A Dlibxsmm_dnn_convolve_st_fwd_custom_custom_generic_bf16.tpl.c25 const element_input_type *B_ptrs[1024]; variable
167 br_gemm_kernel2(A_ptrs, B_ptrs, out_ptr, &n_blocks);
191 br_gemm_kernel2(A_ptrs, B_ptrs, out_ptr, &n_blocks);
213 br_gemm_kernel(A_ptrs, B_ptrs, out_ptr, &n_blocks);
269B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
280 br_gemm_kernel(A_ptrs, B_ptrs, out_ptr, &n_blocks);
354 br_gemm_kernel2(A_ptrs, B_ptrs, out_ptr, &n_blocks);
376 br_gemm_kernel2(A_ptrs, B_ptrs, out_ptr, &n_blocks);
398 br_gemm_kernel(A_ptrs, B_ptrs, out_ptr, &n_blocks);
473 br_gemm_kernel(A_ptrs, B_ptrs, out_ptr, &n_blocks);
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_custom_custom_generic.tpl.c24 const element_input_type *B_ptrs[1024]; variable
187B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki + 1, 0, handle->…
196B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
205B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
259B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
265 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, output, img, ofm1, oj_use, oi_use, 0, handle…
322B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki + 1, 0, handle->…
331B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
340B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
397B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ifm2, ij_use + kj, ii_use + ki, 0, handle->bloc…
[all …]
H A Dlibxsmm_dnn_convolve_st_fwd_nhwc_custom-rsck_generic.tpl.c24 const element_input_type *B_ptrs[1024]; variable
198B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki + 1, ifm2, 0, IFH, IFW…
212B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki, ifm2, 0, IFH, IFW, ha…
226B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki, ifm2, 0, IFH, IFW, ha…
285B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki, ifm2, 0, IFH, IFW, ha…
291 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, output, img, oj_use, oi_use, ofm1, 0, handle…
353B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki + 1, ifm2, 0, IFH, IFW…
367B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki, ifm2, 0, IFH, IFW, ha…
381B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki, ifm2, 0, IFH, IFW, ha…
443B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img, ij_use + kj, ii_use + ki, ifm2, 0, IFH, IFW, ha…
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_custom_custom_generic_bf16.tpl.c27 const element_input_type *B_ptrs[1024]; variable
159B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki + 1, 0, handle-…
167 br_gemm_kernel2(A_ptrs, B_ptrs, del_inp_ptr, &n_blocks);
183B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
191 br_gemm_kernel2(A_ptrs, B_ptrs, del_inp_ptr, &n_blocks);
207B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
215 br_gemm_kernel(A_ptrs, B_ptrs, del_inp_ptr, &n_blocks);
274B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
284 br_gemm_kernel(A_ptrs, B_ptrs, del_inp_ptr, &n_blocks);
338B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
[all …]
H A Dlibxsmm_dnn_convolve_st_upd_nhwc_custom-rsck_generic.tpl.c47 const element_input_type *B_ptrs[1024]; variable
281B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0,…
287 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(6, weight_global, ofm1, ifm1, kj, ki, 0, 0, han…
297B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii + 1, ifm1…
303 …br_gemm_kernel2(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(6, weight_global, ofm1, ifm1, kj, ki, 0, 0, ha…
313B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0,…
330B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0,…
346B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0,…
502B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input_use, img, ij + j_br * handle->desc.u, ii, ifm1, 0, IFHP…
554B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ij + j_br * handle->desc.u, ii, ifm1, 0,…
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_custom_custom_generic.tpl.c23 const element_input_type *B_ptrs[1024]; variable
174B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki + 1, 0, handle-…
178 …br_gemm_kernel2(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ifm1, ij_use, ii_use + 1, 0…
183B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
187 …br_gemm_kernel2(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ifm1, ij_use, ii_use, 0, ha…
192B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
196 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ifm1, ij_use, ii_use, 0, han…
242B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
248 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ifm1, ij_use, ii_use, 0, han…
292B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, ofm2, oj_use + kj, oi_use + ki, 0, handle->blo…
[all …]
H A Dlibxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_generic.tpl.c23 const element_input_type *B_ptrs[1024]; variable
189B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, oj_use + kj, oi_use + ki + 1, ofm2, 0, handle-…
193 …br_gemm_kernel2(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ij_use, ii_use + 1, ifm1, 0…
198B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, oj_use + kj, oi_use + ki, ofm2, 0, handle->ofh…
202 …br_gemm_kernel2(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ij_use, ii_use, ifm1, 0, IF…
207B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, oj_use + kj, oi_use + ki, ofm2, 0, handle->ofh…
211 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ij_use, ii_use, ifm1, 0, IFH…
257B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, oj_use + kj, oi_use + ki, ofm2, 0, handle->ofh…
263 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(5, del_input, img, ij_use, ii_use, ifm1, 0, IFH…
307B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, output, img, oj_use + kj, oi_use + ki, ofm2, 0, handle->ofh…
[all …]
H A Dlibxsmm_dnn_convolve_st_upd_custom_custom_generic.tpl.c32 const element_input_type *B_ptrs[1024]; variable
242B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ifm1, ij + j_br * handle->desc.u, ii, 0,…
247 …br_gemm_kernel(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(6, weight_global, ofm1, ifm1, kj, ki, 0, 0, han…
253B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ifm1, ij + j_br * handle->desc.u, ii + 1…
258 …br_gemm_kernel2(A_ptrs, B_ptrs, &LIBXSMM_VLA_ACCESS(6, weight_global, ofm1, ifm1, kj, ki, 0, 0, ha…
264B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ifm1, ij + j_br * handle->desc.u, ii, 0,…
276B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ifm1, ij + j_br * handle->desc.u, ii, 0,…
287B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ifm1, ij + j_br * handle->desc.u, ii, 0,…
424B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input_use, img, ifm1, ij + j_br * handle->desc.u, ii, 0, hand…
466B_ptrs[ind] = &LIBXSMM_VLA_ACCESS(5, input, img + img_br, ifm1, ij + j_br * handle->desc.u, ii, 0,…
[all …]
H A Dlibxsmm_dnn_convolve_st_upd_custom_custom_generic_bf16.tpl.c128 const element_input_type *B_ptrs[1024]; variable
298B_ptrs[j_br] = (element_input_type*) &LIBXSMM_VLA_ACCESS(5, tr_input_2, img, 0, 0, j_br, 0, handle…
301 br_gemm_kernel(A_ptrs, B_ptrs, dst_ptr, &n_blocks);
416B_ptrs[j_br] = (element_input_type*) &LIBXSMM_VLA_ACCESS(5, tr_input_2, img, 0, 0, j_br, 0, handle…
419 br_gemm_kernel(A_ptrs, B_ptrs, dst_ptr, &n_blocks);
526B_ptrs[img_br] = &LIBXSMM_VLA_ACCESS(4, tr_input, img + img_br, ifm1, 0, pix + kj * handle->ifwp +…
529 br_gemm_kernel(A_ptrs, B_ptrs, dst_ptr, &n_blocks);