/dports/math/libxsmm/libxsmm-1.16.3/src/template/ |
H A D | libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c | 219 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable 243 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) ); 262 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) ); 281 lcl_vo3 = _mm512_add_ps( lcl_vo3, _mm512_load_act( input_add_ptr+32 ) ); 300 lcl_vo4 = _mm512_add_ps( lcl_vo4, _mm512_load_act( input_add_ptr+48 ) ); 320 input_add_ptr += sw*64;
|
H A D | libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c | 234 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable 258 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) ); 277 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) ); 296 lcl_vo3 = _mm512_add_ps( lcl_vo3, _mm512_load_act( input_add_ptr+32 ) ); 315 lcl_vo4 = _mm512_add_ps( lcl_vo4, _mm512_load_act( input_add_ptr+48 ) ); 335 input_add_ptr += sw*64;
|
H A D | libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c | 206 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable 226 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) ); 245 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) ); 263 input_add_ptr += sw*32;
|
H A D | libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c | 224 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable 244 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) ); 263 lcl_vo2 = _mm512_add_ps( lcl_vo2, _mm512_load_act( input_add_ptr+16 ) ); 281 input_add_ptr += sw*32;
|
H A D | libxsmm_dnn_fusedgroupnorm_st_fwd_custom_generic.tpl.c | 175 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, wi, 0, n… variable 202 input_add_f32.i[1] = input_add_ptr[v]; 205 o += input_add_ptr[v];
|
H A D | libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c | 185 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable 203 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) ); 219 input_add_ptr += sw*16;
|
H A D | libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c | 201 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, ipw, 0, … variable 219 lcl_vo = _mm512_add_ps( lcl_vo, _mm512_load_act( input_add_ptr ) ); 235 input_add_ptr += sw*16;
|
H A D | libxsmm_dnn_fusedbatchnorm_st_fwd_custom_generic.tpl.c | 212 …const element_input_type* input_add_ptr = &LIBXSMM_VLA_ACCESS(5, input_add, img, fm, hi, wi, 0, n… variable 238 input_add_f32.i[1] = input_add_ptr[v]; 241 o += input_add_ptr[v];
|
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/common/ |
H A D | dnn_common.h | 2130 …ive_fusedbatchnorm_t* param, const float* input_ptr, float* output_ptr, const float* input_add_ptr, in naive_fusedbatchnorm_fp() argument 2148 LIBXSMM_VLA_DECL(4, const float, input_add, input_add_ptr, nFm, ifh, ifw); in naive_fusedbatchnorm_fp() 2291 …ive_fusedgroupnorm_t* param, const float* input_ptr, float* output_ptr, const float* input_add_ptr, in naive_fusedgroupnorm_fp() argument 2311 LIBXSMM_VLA_DECL(5, const float, input_add, input_add_ptr, nG, nFMG, ifh, ifw); in naive_fusedgroupnorm_fp()
|