Home
last modified time | relevance | path

Searched refs:vfp32 (Results 1 – 14 of 14) sorted by relevance

/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/gxm/include/
H A Dcommon.hpp61 static __m512 gxm_fp32_to_bfp16_rne_adjustment_avx512f(__m512 vfp32) { in gxm_fp32_to_bfp16_rne_adjustment_avx512f() argument
66 __m512i vfp32_as_int = _mm512_castps_si512(vfp32); in gxm_fp32_to_bfp16_rne_adjustment_avx512f()
70 return _mm512_fixupimm_ps(_mm512_castsi512_ps(vfp32_as_int), vfp32, selector, 0); in gxm_fp32_to_bfp16_rne_adjustment_avx512f()
73 static __m256i gxm_fp32_to_bfp16_truncate_avx512f(__m512 vfp32) { in gxm_fp32_to_bfp16_truncate_avx512f() argument
74 __m512i vbfp16_32 = _mm512_srai_epi32(_mm512_castps_si512(vfp32), 16); in gxm_fp32_to_bfp16_truncate_avx512f()
/dports/math/libxsmm/libxsmm-1.16.3/samples/deeplearning/gxm/src/
H A Dreduce_weight_grads_bf16.c21 __m512 vfp32 = _mm512_add_ps(vfp32_l, vfp32_r);
22 __m512 vfp32rne = gxm_fp32_to_bfp16_rne_adjustment_avx512f(vfp32);
36 __m512 vfp32 = _mm512_add_ps(vfp32_l, vfp32_r);
37 __m512 vfp32rne = gxm_fp32_to_bfp16_rne_adjustment_avx512f(vfp32);
H A DFullyConnected.cpp452 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f(_mm512_loadu_ps(in + i)); in convert_f32_bf16() local
453 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f(vfp32); in convert_f32_bf16()
479 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
480 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
491 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
492 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DFCXSMM.cpp410 __m512 vfp32 = _mm512_add_ps(vfp32_l, vfp32_r); in weightUpdate() local
411 __m512 vfp32rne = gxm_fp32_to_bfp16_rne_adjustment_avx512f(vfp32); in weightUpdate()
425 __m512 vfp32 = _mm512_add_ps(vfp32_l, vfp32_r); in weightUpdate() local
426 __m512 vfp32rne = gxm_fp32_to_bfp16_rne_adjustment_avx512f(vfp32); in weightUpdate()
H A DDummyData.cpp120 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f( _mm512_loadu_ps( in+i ) ); in convert_f32_bf16() local
121 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f( vfp32 ); in convert_f32_bf16()
H A DConv.cpp528 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f( _mm512_loadu_ps( in+i ) ); in convert_f32_bf16() local
529 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f( vfp32 ); in convert_f32_bf16()
555 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
556 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
567 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
568 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DSplit.cpp145 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
146 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DPooling.cpp205 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
206 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DSolver.cpp65 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
66 _mm512_storeu_ps( outp+i, vfp32 ); in convert_bf16_f32()
80 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
81 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DConvXSMM.cpp777 __m512 vfp32 = _mm512_add_ps(vfp32_l, vfp32_r); in weightUpdate() local
778 __m512 vfp32rne = gxm_fp32_to_bfp16_rne_adjustment_avx512f(vfp32); in weightUpdate()
792 __m512 vfp32 = _mm512_add_ps(vfp32_l, vfp32_r); in weightUpdate() local
793 __m512 vfp32rne = gxm_fp32_to_bfp16_rne_adjustment_avx512f(vfp32); in weightUpdate()
H A DEngine.cpp888 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f( _mm512_loadu_ps( in+i ) ); in convert_f32_bf16() local
889 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f( vfp32 ); in convert_f32_bf16()
915 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f(_mm512_loadu_ps(inp + i)); in convert_f32_bf16() local
916 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f(vfp32); in convert_f32_bf16()
930 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
931 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DFusedConvBN.cpp580 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f(_mm512_loadu_ps(in + i)); in convert_f32_bf16() local
581 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f(vfp32); in convert_f32_bf16()
595 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
596 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()
H A DJitterData.cpp521 __m512 vfp32 = gxm_fp32_to_bfp16_rne_adjustment_avx512f(_mm512_loadu_ps(in + i)); in convert_f32_bf16() local
522 __m256i vbfp16 = gxm_fp32_to_bfp16_truncate_avx512f(vfp32); in convert_f32_bf16()
H A DFusedBNorm.cpp314 __m512 vfp32 = gxm_bfp16_to_fp32_avx512f( vbfp16 ); in convert_bf16_f32() local
315 _mm512_storeu_ps( out+i, vfp32 ); in convert_bf16_f32()