Home
last modified time | relevance | path

Searched refs:del_beta_img_ptr (Results 1 – 8 of 8) sorted by relevance

/dports/math/libxsmm/libxsmm-1.16.3/src/template/
H A Dlibxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c32_avx512.tpl.c112 element_stats_type* del_beta_img_ptr; variable
117 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, 32);
186 _mm512_storeu_ps( del_beta_img_ptr, lcl_vdbeta );
188 _mm512_storeu_ps( del_beta_img_ptr+16, lcl_vdbeta2 );
206 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
208 lcl_vdbeta2 = _mm512_add_ps( lcl_vdbeta2, _mm512_loadu_ps( del_beta_img_ptr+16 ) );
210 del_beta_img_ptr += 32;
230 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
234 del_beta_img_ptr += 32;
238 _mm512_storeu_ps( del_beta_img_ptr - (32*nImg), lcl_vdbeta );
[all …]
H A Dlibxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c16_avx512.tpl.c109 element_stats_type* del_beta_img_ptr; variable
114 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, 16);
158 _mm512_storeu_ps( del_beta_img_ptr, lcl_vdbeta );
168 … element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, 0, 0, nImg, 16); variable
174 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
176 del_beta_img_ptr += 16;
186 … element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, 0, 0, nImg, 16); variable
192 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
194 del_beta_img_ptr += 16;
198 _mm512_storeu_ps( del_beta_img_ptr - (nImg*16), lcl_vdbeta );
H A Dlibxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c64_avx512.tpl.c118 element_stats_type* del_beta_img_ptr; variable
123 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, 64);
232 _mm512_storeu_ps( del_beta_img_ptr, lcl_vdbeta );
234 _mm512_storeu_ps( del_beta_img_ptr+16, lcl_vdbeta2 );
236 _mm512_storeu_ps( del_beta_img_ptr+32, lcl_vdbeta3 );
238 _mm512_storeu_ps( del_beta_img_ptr+48, lcl_vdbeta4 );
254 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
256 del_beta_img_ptr += 64;
272 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
274 del_beta_img_ptr += 64;
[all …]
H A Dlibxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c32_avx512.tpl.c109 element_stats_type* del_beta_img_ptr; variable
114 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, 32);
185 _mm512_storeu_ps( del_beta_img_ptr, lcl_vdbeta );
187 _mm512_storeu_ps( del_beta_img_ptr+16, lcl_vdbeta2 );
195 … element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, 0, 0, nImg, 32); variable
203 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
205 lcl_vdbeta2 = _mm512_add_ps( lcl_vdbeta2, _mm512_loadu_ps( del_beta_img_ptr+16 ) );
207 del_beta_img_ptr += 32;
H A Dlibxsmm_dnn_fusedbatchnorm_st_bwd_custom_generic.tpl.c114 element_stats_type* del_beta_img_ptr; variable
119 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, nFmBlock);
184 del_beta_img_ptr[v] = lcl_beta_ptr[v];
205 …element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, nFmBl… variable
210 del_beta_ptr[v] += del_beta_img_ptr[v];
222 …element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, nFmBl… variable
227 del_beta_ptr[v] += del_beta_img_ptr[v];
H A Dlibxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c64_avx512.tpl.c115 element_stats_type* del_beta_img_ptr; variable
120 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, 64);
231 _mm512_storeu_ps( del_beta_img_ptr, lcl_vdbeta );
233 _mm512_storeu_ps( del_beta_img_ptr+16, lcl_vdbeta2 );
235 _mm512_storeu_ps( del_beta_img_ptr+32, lcl_vdbeta3 );
237 _mm512_storeu_ps( del_beta_img_ptr+48, lcl_vdbeta4 );
245 …element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, (fm/4), 0, ((fm%4)*16),… variable
251 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
253 del_beta_img_ptr += 64;
H A Dlibxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c16_avx512.tpl.c106 element_stats_type* del_beta_img_ptr; variable
111 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, 16);
154 _mm512_storeu_ps( del_beta_img_ptr, lcl_vdbeta );
162 … element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, 0, 0, nImg, 16); variable
168 lcl_vdbeta = _mm512_add_ps( lcl_vdbeta, _mm512_loadu_ps( del_beta_img_ptr ) );
170 del_beta_img_ptr += 16;
H A Dlibxsmm_dnn_fusedgroupnorm_st_bwd_custom_generic.tpl.c130 element_stats_type* del_beta_img_ptr; variable
133 del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, nFmBlock);
201 del_beta_img_ptr[v] = lcl_beta_ptr[v];
254 …element_stats_type* del_beta_img_ptr = &LIBXSMM_VLA_ACCESS(3, dbeta_img, fm, img, 0, nImg, nFmBl… variable
259 del_beta_ptr[v] += del_beta_img_ptr[v];