Home
last modified time | relevance | path

Searched refs:Vmm (Results 1 – 25 of 176) sorted by relevance

12345678

/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/
H A Djit_uni_resampling_kernel.hpp104 const Vmm vmm_tail_mask_ = Vmm(0);
109 const Vmm vmm_full_mask_ = Vmm(1);
110 const Vmm vmm_src_ = Vmm(2);
111 const Vmm vmm_weights_ = Vmm(3);
112 const Vmm vmm_indices_ = Vmm(4);
114 const Vmm vmm_sum_scale_ = Vmm(7);
115 const Vmm vmm_tmp_ = Vmm(8);
151 const Vmm weight_left_ = Vmm(1);
152 const Vmm weight_right_ = Vmm(2);
153 const Vmm weight_top_ = Vmm(3);
[all …]
H A Djit_uni_binary_kernel.hpp72 using Vmm = typename cpu_isa_traits<isa>::Vmm; typedef
104 const Vmm vmm_tail_vmask_ = Vmm(0);
105 const Vmm vreg_sum_scale_ = Vmm(is_avx512 ? 17 : 9);
107 const Vmm vreg_zero_ = Vmm(is_avx512 ? 18 : 10);
108 const Vmm vreg_one_ = Vmm(is_avx512 ? 19 : 11);
112 const Vmm vreg_scales_src0_ = Vmm(is_avx512 ? 22 : 14);
113 const Vmm vreg_scales_src1_ = Vmm(is_avx512 ? 23 : 15);
120 const Vmm vmm_full_mask_ = Vmm(is_avx512_not_mic ? 24 : 5);
121 const Vmm vmm_tmp_gather_ = Vmm(is_avx512_not_mic ? 25 : 6);
122 const Vmm vmm_indices_ = Vmm(is_avx512_not_mic ? 30 : 7);
[all …]
H A Djit_uni_x8s8s32x_conv_kernel.hpp95 const Vmm vmm_wei = Vmm(0);
97 const Vmm vmm_bias = Vmm(0);
98 const Vmm vmm_comp = Vmm(2); // only for signed input
99 const Vmm vmm_scale = Vmm(1);
101 const Vmm vmm_prev_dst = Vmm(0);
103 const Vmm vmm_zero = Vmm(0);
104 const Vmm vmm_saturation = Vmm(0);
106 const Vmm vmm_zp = Vmm(6);
107 const Vmm vmm_zp_one = Vmm(5);
112 const Vmm vmm_shift = Vmm(1); // only for signed input
[all …]
H A Djit_uni_batch_normalization_s8.cpp254 Vmm v = Vmm(0); in compute_dst()
255 Vmm vscale = Vmm(1); in compute_dst()
256 Vmm vshift = Vmm(2); in compute_dst()
257 Vmm vmean = Vmm(3); in compute_dst()
309 Vmm tail_vmask = Vmm(11);
310 Vmm body_vmask = Vmm(12);
363 Vmm v0 = Vmm(0); in compute_dst()
365 Vmm v1 = Vmm(1); in compute_dst()
368 Vmm vmean0 = Vmm(4); in compute_dst()
491 Vmm v0 = Vmm(0); in compute_dst()
[all …]
H A Djit_uni_reduction_kernel.hpp48 template <typename Vmm>
85 const Vmm vmm_tail_load_mask_ = Vmm(0);
86 const Vmm vmm_tail_store_mask_ = Vmm(1);
87 const Vmm vmm_zero_saturation_ = Vmm(2);
88 const Vmm vmm_saturation_ubound_ = Vmm(3);
89 const Vmm vmm_acc_ = Vmm(4);
90 const Vmm vmm_tmp1_ = Vmm(5);
91 const Vmm vmm_tmp2_ = Vmm(6);
92 const Vmm vmm_tmp3_ = Vmm(7);
93 const Vmm vmm_tmp4_ = Vmm(8);
[all …]
H A Djit_uni_x8s8s32x_1x1_conv_kernel.hpp75 const Vmm vmm_tmp = Vmm(3);
76 const Vmm vmm_one = Vmm(2);
77 const Vmm vmm_zero = Vmm(1);
78 const Vmm vmm_shift = Vmm(1);
79 const Vmm vmm_bcast = Vmm(0);
80 const Vmm vmm_saturation = Vmm(0);
82 const Vmm vmm_scale = Vmm(1);
84 const Vmm vmm_prev_dst = Vmm(1);
87 const Vmm vmm_bias = Vmm(3);
89 const Vmm vmm_zp = Vmm(1);
[all …]
H A Djit_avx512_core_x8s8s32x_conv_kernel.hpp32 template <typename Vmm>
101 const Vmm vmm_wei = Vmm(31);
103 const Vmm vmm_comp = Vmm(30); // only for signed input
104 const Vmm vmm_bias = Vmm(31);
106 const Vmm vmm_prev_dst = Vmm(31);
108 const Vmm vmm_saturation = Vmm(30);
109 const Vmm vmm_zero = Vmm(31);
114 const Vmm vmm_tmp = Vmm(28); // not used for depthwise
115 const Vmm vmm_one
118 const Vmm vmm_zp = Vmm(25);
[all …]
H A Djit_uni_batch_normalization.cpp530 Vmm b = Vmm(0); in mean_channels()
553 uni_vaddps(Vmm(ch_idx), Vmm(ch_idx), Vmm(sp_idx++)); in mean_variance_nspc()
572 uni_vfmadd231ps(Vmm(ch_idx), Vmm(30), Vmm(30)); in mean_variance_nspc()
801 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance()
861 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance()
862 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance()
933 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance()
1473 uni_vaddps(Vmm(idx), Vmm(idx), Vmm(idx + 1)); in backward_diff_channels_nspc_compute()
1566 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward()
1637 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward()
[all …]
H A Djit_uni_x8s8s32x_deconvolution.hpp40 template <cpu_isa_t isa, typename Vmm>
46 template <cpu_isa_t isa, typename Vmm>
106 const Vmm vmm_tmp_ = Vmm(3);
107 const Vmm vmm_one_ = Vmm(2);
109 const Vmm vmm_zero_ = Vmm(0);
111 const Vmm &vmm_wei_ = vmm_zero_;
114 const Vmm vmm_shift_ = Vmm(1);
115 const Vmm vmm_comp_ = Vmm(1);
116 const Vmm &vmm_bias_ = vmm_zero_;
121 Vmm vmm_bias_alpha() const;
[all …]
H A Djit_uni_dw_conv_kernel_f32.hpp83 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg()
84 inline Vmm get_src_reg(int idx) { return Vmm(idx + 1); } in get_src_reg()
90 inline Vmm get_acc_reg(int idx) { return Vmm(get_acc_reg_idx(idx)); } in get_acc_reg()
94 void add_tail_from_mem(Vmm &vmm_acc, Vmm &vmm_tmp, const Xbyak::Reg64 &reg,
142 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg()
143 inline Vmm get_ddst_reg(int idx) { return Vmm(idx + 1); } in get_ddst_reg()
144 inline Vmm get_acc_reg(int idx) { return Vmm(idx + 4); } in get_acc_reg()
217 inline Vmm get_bias_reg(int idx = 0) { return Vmm(idx); } in get_bias_reg()
222 return Vmm(vmm_idx); in get_output_reg()
236 inline Vmm get_aux_reg() { return Vmm(0); } in get_aux_reg()
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/
H A Djit_uni_resampling_kernel.hpp104 const Vmm vmm_tail_mask_ = Vmm(0);
109 const Vmm vmm_full_mask_ = Vmm(1);
110 const Vmm vmm_src_ = Vmm(2);
111 const Vmm vmm_weights_ = Vmm(3);
112 const Vmm vmm_indices_ = Vmm(4);
114 const Vmm vmm_sum_scale_ = Vmm(7);
115 const Vmm vmm_tmp_ = Vmm(8);
151 const Vmm weight_left_ = Vmm(1);
152 const Vmm weight_right_ = Vmm(2);
153 const Vmm weight_top_ = Vmm(3);
[all …]
H A Djit_uni_binary_kernel.hpp72 using Vmm = typename cpu_isa_traits<isa>::Vmm; typedef
104 const Vmm vmm_tail_vmask_ = Vmm(0);
105 const Vmm vreg_sum_scale_ = Vmm(is_avx512 ? 17 : 9);
107 const Vmm vreg_zero_ = Vmm(is_avx512 ? 18 : 10);
108 const Vmm vreg_one_ = Vmm(is_avx512 ? 19 : 11);
112 const Vmm vreg_scales_src0_ = Vmm(is_avx512 ? 22 : 14);
113 const Vmm vreg_scales_src1_ = Vmm(is_avx512 ? 23 : 15);
120 const Vmm vmm_full_mask_ = Vmm(is_avx512_not_mic ? 24 : 5);
121 const Vmm vmm_tmp_gather_ = Vmm(is_avx512_not_mic ? 25 : 6);
122 const Vmm vmm_indices_ = Vmm(is_avx512_not_mic ? 30 : 7);
[all …]
H A Djit_uni_x8s8s32x_conv_kernel.hpp99 const Vmm vmm_wei = Vmm(0);
101 const Vmm vmm_bias = Vmm(0);
102 const Vmm vmm_comp = Vmm(2); // only for signed input
103 const Vmm vmm_scale = Vmm(1);
105 const Vmm vmm_prev_dst = Vmm(0);
107 const Vmm vmm_zero = Vmm(0);
108 const Vmm vmm_saturation = Vmm(0);
110 const Vmm vmm_zp = Vmm(6);
111 const Vmm vmm_zp_one = Vmm(5);
116 const Vmm vmm_shift = Vmm(1); // only for signed input
[all …]
H A Djit_uni_batch_normalization_s8.cpp252 Vmm v = Vmm(0); in compute_dst()
253 Vmm vscale = Vmm(1); in compute_dst()
254 Vmm vshift = Vmm(2); in compute_dst()
255 Vmm vmean = Vmm(3); in compute_dst()
307 Vmm tail_vmask = Vmm(11);
308 Vmm body_vmask = Vmm(12);
361 Vmm v0 = Vmm(0); in compute_dst()
363 Vmm v1 = Vmm(1); in compute_dst()
366 Vmm vmean0 = Vmm(4); in compute_dst()
489 Vmm v0 = Vmm(0); in compute_dst()
[all …]
H A Djit_uni_reduction_kernel.hpp48 template <typename Vmm>
85 const Vmm vmm_tail_load_mask_ = Vmm(0);
86 const Vmm vmm_tail_store_mask_ = Vmm(1);
87 const Vmm vmm_zero_saturation_ = Vmm(2);
88 const Vmm vmm_saturation_ubound_ = Vmm(3);
89 const Vmm vmm_acc_ = Vmm(4);
90 const Vmm vmm_tmp1_ = Vmm(5);
91 const Vmm vmm_tmp2_ = Vmm(6);
92 const Vmm vmm_tmp3_ = Vmm(7);
93 const Vmm vmm_tmp4_ = Vmm(8);
[all …]
H A Djit_uni_x8s8s32x_1x1_conv_kernel.hpp76 const Vmm vmm_tmp = Vmm(3);
77 const Vmm vmm_one = Vmm(2);
78 const Vmm vmm_zero = Vmm(1);
79 const Vmm vmm_shift = Vmm(1);
80 const Vmm vmm_bcast = Vmm(0);
81 const Vmm vmm_saturation = Vmm(0);
83 const Vmm vmm_scale = Vmm(1);
85 const Vmm vmm_prev_dst = Vmm(1);
88 const Vmm vmm_bias = Vmm(3);
90 const Vmm vmm_zp = Vmm(1);
[all …]
H A Djit_avx512_core_x8s8s32x_conv_kernel.hpp32 template <typename Vmm>
105 const Vmm vmm_wei = Vmm(31);
107 const Vmm vmm_comp = Vmm(30); // only for signed input
108 const Vmm vmm_bias = Vmm(31);
110 const Vmm vmm_prev_dst = Vmm(31);
112 const Vmm vmm_saturation = Vmm(30);
113 const Vmm vmm_sum_zp = Vmm(30);
114 const Vmm vmm_zero = Vmm(31);
119 const Vmm vmm_tmp = Vmm(28); // not used for depthwise
123 const Vmm vmm_zp = Vmm(25);
[all …]
H A Djit_uni_x8s8s32x_deconvolution.hpp41 template <cpu_isa_t isa, typename Vmm>
47 template <cpu_isa_t isa, typename Vmm>
107 const Vmm vmm_tmp_ = Vmm(3);
108 const Vmm vmm_one_ = Vmm(2);
110 const Vmm vmm_zero_ = Vmm(0);
112 const Vmm &vmm_wei_ = vmm_zero_;
115 const Vmm vmm_shift_ = Vmm(1);
116 const Vmm vmm_comp_ = Vmm(1);
117 const Vmm &vmm_bias_ = vmm_zero_;
123 Vmm vmm_bias_alpha() const;
[all …]
H A Djit_uni_batch_normalization.cpp529 Vmm b = Vmm(0); in mean_channels()
552 uni_vaddps(Vmm(ch_idx), Vmm(ch_idx), Vmm(sp_idx++)); in mean_variance_nspc()
571 uni_vfmadd231ps(Vmm(ch_idx), Vmm(30), Vmm(30)); in mean_variance_nspc()
800 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance()
860 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance()
861 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance()
932 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance()
1472 uni_vaddps(Vmm(idx), Vmm(idx), Vmm(idx + 1)); in backward_diff_channels_nspc_compute()
1565 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward()
1636 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward()
[all …]
H A Djit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp32 template <typename Vmm>
80 const Vmm vmm_tmp = Vmm(28);
81 const Vmm vmm_saturation = Vmm(28);
82 const Vmm vmm_one = Vmm(29);
83 const Vmm vmm_zero = Vmm(30);
84 const Vmm vmm_prev_dst = Vmm(30);
85 const Vmm vmm_shift = Vmm(30);
86 const Vmm vmm_bcast = Vmm(31);
87 const Vmm vmm_bias_alpha = Vmm(31);
90 const Vmm vmm_zp = Vmm(30);
[all …]
H A Djit_uni_dw_conv_kernel_f32.hpp84 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg()
85 inline Vmm get_src_reg(int idx) { return Vmm(idx + 1); } in get_src_reg()
91 inline Vmm get_acc_reg(int idx) { return Vmm(get_acc_reg_idx(idx)); } in get_acc_reg()
95 void add_tail_from_mem(Vmm &vmm_acc, Vmm &vmm_tmp, const Xbyak::Reg64 &reg,
143 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg()
144 inline Vmm get_ddst_reg(int idx) { return Vmm(idx + 1); } in get_ddst_reg()
145 inline Vmm get_acc_reg(int idx) { return Vmm(idx + 4); } in get_acc_reg()
218 inline Vmm get_bias_reg(int idx = 0) { return Vmm(idx); } in get_bias_reg()
223 return Vmm(vmm_idx); in get_output_reg()
237 inline Vmm get_aux_reg() { return Vmm(0); } in get_aux_reg()
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/injectors/
H A Djit_uni_eltwise_injector.hpp188 void vec_shift(const Vmm &vmm_dst, const Vmm &vmm_src, bool shift_left,
190 void compute_cmp_mask(const Vmm &vmm_src,
195 void exp_compute_vector_fwd(const Vmm &vmm_src);
198 void elu_compute_vector_fwd(const Vmm &vmm_src);
201 void abs_compute_vector_fwd(const Vmm &vmm_src);
211 void log_compute_vector_fwd(const Vmm &vmm_src);
213 void pow_compute_vector_fwd(const Vmm &vmm_src);
218 void exp_compute_vector_bwd(const Vmm &vmm_src);
220 void elu_compute_vector_bwd(const Vmm &vmm_src);
223 void abs_compute_vector_bwd(const Vmm &vmm_src);
[all …]
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/injectors/
H A Djit_uni_eltwise_injector.hpp74 using Vmm = typename cpu_isa_traits<isa>::Vmm; typedef
184 void vec_shift(const Vmm &vmm_dst, const Vmm &vmm_src, bool shift_left,
186 void compute_cmp_mask(const Vmm &vmm_src,
191 void exp_compute_vector_fwd(const Vmm &vmm_src);
194 void elu_compute_vector_fwd(const Vmm &vmm_src);
197 void abs_compute_vector_fwd(const Vmm &vmm_src);
207 void log_compute_vector_fwd(const Vmm &vmm_src);
209 void pow_compute_vector_fwd(const Vmm &vmm_src);
214 void exp_compute_vector_bwd(const Vmm &vmm_src);
216 void elu_compute_vector_bwd(const Vmm &vmm_src);
[all …]
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/utils/
H A Djit_io_helper.cpp67 template <typename Vmm>
118 template <typename Vmm>
121 template <typename Vmm>
160 void jit_io_helper_t<Vmm>::prepare_i8_data_to_store(const Vmm &i8_vmm) { in prepare_i8_data_to_store()
383 const Vmm vmm_mask = Vmm(gather_conf_->full_vmm_mask_idx_); in init_full_mask()
401 const Vmm &indices_vmm, const Vmm &dst_vmm, const bool tail) { in gather()
406 const Vmm &mask = tail ? Vmm(tail_conf_->tail_vmm_mask_idx_) in gather()
538 void jit_io_helper_t<Vmm>::store(const Vmm &src_raw_vmm, in store()
578 void jit_io_helper_t<Vmm>::saturate(const Vmm &vmm) { in saturate()
587 void jit_io_helper_t<Vmm>::store_byte_by_byte(const Vmm &src_vmm, in store_byte_by_byte()
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/utils/
H A Djit_io_helper.cpp67 template <typename Vmm>
118 template <typename Vmm>
121 template <typename Vmm>
160 void jit_io_helper_t<Vmm>::prepare_i8_data_to_store(const Vmm &i8_vmm) { in prepare_i8_data_to_store()
383 const Vmm vmm_mask = Vmm(gather_conf_->full_vmm_mask_idx_); in init_full_mask()
401 const Vmm &indices_vmm, const Vmm &dst_vmm, const bool tail) { in gather()
406 const Vmm &mask = tail ? Vmm(tail_conf_->tail_vmm_mask_idx_) in gather()
538 void jit_io_helper_t<Vmm>::store(const Vmm &src_raw_vmm, in store()
578 void jit_io_helper_t<Vmm>::saturate(const Vmm &vmm) { in saturate()
587 void jit_io_helper_t<Vmm>::store_byte_by_byte(const Vmm &src_vmm, in store_byte_by_byte()
[all …]

12345678