Home
last modified time | relevance | path

Searched refs:input_add_ptr (Results 1 – 9 of 9) sorted by relevance

/dports/math/libxsmm/libxsmm-1.16.3/src/template/
H A Dlibxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c219 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable
243 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) );
262 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) );
281 lcl_vo3 = _mm512_add_ps( lcl_vo3, _mm512_load_act( input_add_ptr+32 ) );
300 lcl_vo4 = _mm512_add_ps( lcl_vo4, _mm512_load_act( input_add_ptr+48 ) );
320 input_add_ptr += sw*64;
H A Dlibxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c234 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable
258 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) );
277 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) );
296 lcl_vo3 = _mm512_add_ps( lcl_vo3, _mm512_load_act( input_add_ptr+32 ) );
315 lcl_vo4 = _mm512_add_ps( lcl_vo4, _mm512_load_act( input_add_ptr+48 ) );
335 input_add_ptr += sw*64;
H A Dlibxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c206 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable
226 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) );
245 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) );
263 input_add_ptr += sw*32;
H A Dlibxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c224 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable
244 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) );
263 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) );
281 input_add_ptr += sw*32;
H A Dlibxsmm_dnn_fusedgroupnorm_st_fwd_custom_generic.tpl.c175 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, wi, 0, n… variable
202 input_add_f32.i[1] = input_add_ptr[v];
205 o += input_add_ptr[v];
H A Dlibxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c185 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable
203 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) );
219 input_add_ptr += sw*16;
H A Dlibxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c201 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable
219 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) );
235 input_add_ptr += sw*16;
H A Dlibxsmm_dnn_fusedbatchnorm_st_fwd_custom_generic.tpl.c212 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, wi, 0, n… variable
238 input_add_f32.i[1] = input_add_ptr[v];
241 o += input_add_ptr[v];
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/common/
H A Ddnn_common.h2130 …ive_fusedbatchnorm_t* param, const float* input_ptr, float* output_ptr, const float* input_add_ptr, in naive_fusedbatchnorm_fp() argument
2148 LIBXSMM_VLA_DECL(4, const float, input_add, input_add_ptr, nFm, ifh, ifw); in naive_fusedbatchnorm_fp()
2291 …ive_fusedgroupnorm_t* param, const float* input_ptr, float* output_ptr, const float* input_add_ptr, in naive_fusedgroupnorm_fp() argument
2311 LIBXSMM_VLA_DECL(5, const float, input_add, input_add_ptr, nG, nFMG, ifh, ifw); in naive_fusedgroupnorm_fp()