/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/ |
H A D | jit_uni_resampling_kernel.hpp | 104 const Vmm vmm_tail_mask_ = Vmm(0); 109 const Vmm vmm_full_mask_ = Vmm(1); 110 const Vmm vmm_src_ = Vmm(2); 111 const Vmm vmm_weights_ = Vmm(3); 112 const Vmm vmm_indices_ = Vmm(4); 114 const Vmm vmm_sum_scale_ = Vmm(7); 115 const Vmm vmm_tmp_ = Vmm(8); 151 const Vmm weight_left_ = Vmm(1); 152 const Vmm weight_right_ = Vmm(2); 153 const Vmm weight_top_ = Vmm(3); [all …]
|
H A D | jit_uni_binary_kernel.hpp | 72 using Vmm = typename cpu_isa_traits<isa>::Vmm; typedef 104 const Vmm vmm_tail_vmask_ = Vmm(0); 105 const Vmm vreg_sum_scale_ = Vmm(is_avx512 ? 17 : 9); 107 const Vmm vreg_zero_ = Vmm(is_avx512 ? 18 : 10); 108 const Vmm vreg_one_ = Vmm(is_avx512 ? 19 : 11); 112 const Vmm vreg_scales_src0_ = Vmm(is_avx512 ? 22 : 14); 113 const Vmm vreg_scales_src1_ = Vmm(is_avx512 ? 23 : 15); 120 const Vmm vmm_full_mask_ = Vmm(is_avx512_not_mic ? 24 : 5); 121 const Vmm vmm_tmp_gather_ = Vmm(is_avx512_not_mic ? 25 : 6); 122 const Vmm vmm_indices_ = Vmm(is_avx512_not_mic ? 30 : 7); [all …]
|
H A D | jit_uni_x8s8s32x_conv_kernel.hpp | 95 const Vmm vmm_wei = Vmm(0); 97 const Vmm vmm_bias = Vmm(0); 98 const Vmm vmm_comp = Vmm(2); // only for signed input 99 const Vmm vmm_scale = Vmm(1); 101 const Vmm vmm_prev_dst = Vmm(0); 103 const Vmm vmm_zero = Vmm(0); 104 const Vmm vmm_saturation = Vmm(0); 106 const Vmm vmm_zp = Vmm(6); 107 const Vmm vmm_zp_one = Vmm(5); 112 const Vmm vmm_shift = Vmm(1); // only for signed input [all …]
|
H A D | jit_uni_batch_normalization_s8.cpp | 254 Vmm v = Vmm(0); in compute_dst() 255 Vmm vscale = Vmm(1); in compute_dst() 256 Vmm vshift = Vmm(2); in compute_dst() 257 Vmm vmean = Vmm(3); in compute_dst() 309 Vmm tail_vmask = Vmm(11); 310 Vmm body_vmask = Vmm(12); 363 Vmm v0 = Vmm(0); in compute_dst() 365 Vmm v1 = Vmm(1); in compute_dst() 368 Vmm vmean0 = Vmm(4); in compute_dst() 491 Vmm v0 = Vmm(0); in compute_dst() [all …]
|
H A D | jit_uni_reduction_kernel.hpp | 48 template <typename Vmm> 85 const Vmm vmm_tail_load_mask_ = Vmm(0); 86 const Vmm vmm_tail_store_mask_ = Vmm(1); 87 const Vmm vmm_zero_saturation_ = Vmm(2); 88 const Vmm vmm_saturation_ubound_ = Vmm(3); 89 const Vmm vmm_acc_ = Vmm(4); 90 const Vmm vmm_tmp1_ = Vmm(5); 91 const Vmm vmm_tmp2_ = Vmm(6); 92 const Vmm vmm_tmp3_ = Vmm(7); 93 const Vmm vmm_tmp4_ = Vmm(8); [all …]
|
H A D | jit_uni_x8s8s32x_1x1_conv_kernel.hpp | 75 const Vmm vmm_tmp = Vmm(3); 76 const Vmm vmm_one = Vmm(2); 77 const Vmm vmm_zero = Vmm(1); 78 const Vmm vmm_shift = Vmm(1); 79 const Vmm vmm_bcast = Vmm(0); 80 const Vmm vmm_saturation = Vmm(0); 82 const Vmm vmm_scale = Vmm(1); 84 const Vmm vmm_prev_dst = Vmm(1); 87 const Vmm vmm_bias = Vmm(3); 89 const Vmm vmm_zp = Vmm(1); [all …]
|
H A D | jit_avx512_core_x8s8s32x_conv_kernel.hpp | 32 template <typename Vmm> 101 const Vmm vmm_wei = Vmm(31); 103 const Vmm vmm_comp = Vmm(30); // only for signed input 104 const Vmm vmm_bias = Vmm(31); 106 const Vmm vmm_prev_dst = Vmm(31); 108 const Vmm vmm_saturation = Vmm(30); 109 const Vmm vmm_zero = Vmm(31); 114 const Vmm vmm_tmp = Vmm(28); // not used for depthwise 115 const Vmm vmm_one 118 const Vmm vmm_zp = Vmm(25); [all …]
|
H A D | jit_uni_batch_normalization.cpp | 530 Vmm b = Vmm(0); in mean_channels() 553 uni_vaddps(Vmm(ch_idx), Vmm(ch_idx), Vmm(sp_idx++)); in mean_variance_nspc() 572 uni_vfmadd231ps(Vmm(ch_idx), Vmm(30), Vmm(30)); in mean_variance_nspc() 801 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance() 861 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance() 862 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance() 933 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance() 1473 uni_vaddps(Vmm(idx), Vmm(idx), Vmm(idx + 1)); in backward_diff_channels_nspc_compute() 1566 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward() 1637 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward() [all …]
|
H A D | jit_uni_x8s8s32x_deconvolution.hpp | 40 template <cpu_isa_t isa, typename Vmm> 46 template <cpu_isa_t isa, typename Vmm> 106 const Vmm vmm_tmp_ = Vmm(3); 107 const Vmm vmm_one_ = Vmm(2); 109 const Vmm vmm_zero_ = Vmm(0); 111 const Vmm &vmm_wei_ = vmm_zero_; 114 const Vmm vmm_shift_ = Vmm(1); 115 const Vmm vmm_comp_ = Vmm(1); 116 const Vmm &vmm_bias_ = vmm_zero_; 121 Vmm vmm_bias_alpha() const; [all …]
|
H A D | jit_uni_dw_conv_kernel_f32.hpp | 83 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg() 84 inline Vmm get_src_reg(int idx) { return Vmm(idx + 1); } in get_src_reg() 90 inline Vmm get_acc_reg(int idx) { return Vmm(get_acc_reg_idx(idx)); } in get_acc_reg() 94 void add_tail_from_mem(Vmm &vmm_acc, Vmm &vmm_tmp, const Xbyak::Reg64 ®, 142 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg() 143 inline Vmm get_ddst_reg(int idx) { return Vmm(idx + 1); } in get_ddst_reg() 144 inline Vmm get_acc_reg(int idx) { return Vmm(idx + 4); } in get_acc_reg() 217 inline Vmm get_bias_reg(int idx = 0) { return Vmm(idx); } in get_bias_reg() 222 return Vmm(vmm_idx); in get_output_reg() 236 inline Vmm get_aux_reg() { return Vmm(0); } in get_aux_reg() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/ |
H A D | jit_uni_resampling_kernel.hpp | 104 const Vmm vmm_tail_mask_ = Vmm(0); 109 const Vmm vmm_full_mask_ = Vmm(1); 110 const Vmm vmm_src_ = Vmm(2); 111 const Vmm vmm_weights_ = Vmm(3); 112 const Vmm vmm_indices_ = Vmm(4); 114 const Vmm vmm_sum_scale_ = Vmm(7); 115 const Vmm vmm_tmp_ = Vmm(8); 151 const Vmm weight_left_ = Vmm(1); 152 const Vmm weight_right_ = Vmm(2); 153 const Vmm weight_top_ = Vmm(3); [all …]
|
H A D | jit_uni_binary_kernel.hpp | 72 using Vmm = typename cpu_isa_traits<isa>::Vmm; typedef 104 const Vmm vmm_tail_vmask_ = Vmm(0); 105 const Vmm vreg_sum_scale_ = Vmm(is_avx512 ? 17 : 9); 107 const Vmm vreg_zero_ = Vmm(is_avx512 ? 18 : 10); 108 const Vmm vreg_one_ = Vmm(is_avx512 ? 19 : 11); 112 const Vmm vreg_scales_src0_ = Vmm(is_avx512 ? 22 : 14); 113 const Vmm vreg_scales_src1_ = Vmm(is_avx512 ? 23 : 15); 120 const Vmm vmm_full_mask_ = Vmm(is_avx512_not_mic ? 24 : 5); 121 const Vmm vmm_tmp_gather_ = Vmm(is_avx512_not_mic ? 25 : 6); 122 const Vmm vmm_indices_ = Vmm(is_avx512_not_mic ? 30 : 7); [all …]
|
H A D | jit_uni_x8s8s32x_conv_kernel.hpp | 99 const Vmm vmm_wei = Vmm(0); 101 const Vmm vmm_bias = Vmm(0); 102 const Vmm vmm_comp = Vmm(2); // only for signed input 103 const Vmm vmm_scale = Vmm(1); 105 const Vmm vmm_prev_dst = Vmm(0); 107 const Vmm vmm_zero = Vmm(0); 108 const Vmm vmm_saturation = Vmm(0); 110 const Vmm vmm_zp = Vmm(6); 111 const Vmm vmm_zp_one = Vmm(5); 116 const Vmm vmm_shift = Vmm(1); // only for signed input [all …]
|
H A D | jit_uni_batch_normalization_s8.cpp | 252 Vmm v = Vmm(0); in compute_dst() 253 Vmm vscale = Vmm(1); in compute_dst() 254 Vmm vshift = Vmm(2); in compute_dst() 255 Vmm vmean = Vmm(3); in compute_dst() 307 Vmm tail_vmask = Vmm(11); 308 Vmm body_vmask = Vmm(12); 361 Vmm v0 = Vmm(0); in compute_dst() 363 Vmm v1 = Vmm(1); in compute_dst() 366 Vmm vmean0 = Vmm(4); in compute_dst() 489 Vmm v0 = Vmm(0); in compute_dst() [all …]
|
H A D | jit_uni_reduction_kernel.hpp | 48 template <typename Vmm> 85 const Vmm vmm_tail_load_mask_ = Vmm(0); 86 const Vmm vmm_tail_store_mask_ = Vmm(1); 87 const Vmm vmm_zero_saturation_ = Vmm(2); 88 const Vmm vmm_saturation_ubound_ = Vmm(3); 89 const Vmm vmm_acc_ = Vmm(4); 90 const Vmm vmm_tmp1_ = Vmm(5); 91 const Vmm vmm_tmp2_ = Vmm(6); 92 const Vmm vmm_tmp3_ = Vmm(7); 93 const Vmm vmm_tmp4_ = Vmm(8); [all …]
|
H A D | jit_uni_x8s8s32x_1x1_conv_kernel.hpp | 76 const Vmm vmm_tmp = Vmm(3); 77 const Vmm vmm_one = Vmm(2); 78 const Vmm vmm_zero = Vmm(1); 79 const Vmm vmm_shift = Vmm(1); 80 const Vmm vmm_bcast = Vmm(0); 81 const Vmm vmm_saturation = Vmm(0); 83 const Vmm vmm_scale = Vmm(1); 85 const Vmm vmm_prev_dst = Vmm(1); 88 const Vmm vmm_bias = Vmm(3); 90 const Vmm vmm_zp = Vmm(1); [all …]
|
H A D | jit_avx512_core_x8s8s32x_conv_kernel.hpp | 32 template <typename Vmm> 105 const Vmm vmm_wei = Vmm(31); 107 const Vmm vmm_comp = Vmm(30); // only for signed input 108 const Vmm vmm_bias = Vmm(31); 110 const Vmm vmm_prev_dst = Vmm(31); 112 const Vmm vmm_saturation = Vmm(30); 113 const Vmm vmm_sum_zp = Vmm(30); 114 const Vmm vmm_zero = Vmm(31); 119 const Vmm vmm_tmp = Vmm(28); // not used for depthwise 123 const Vmm vmm_zp = Vmm(25); [all …]
|
H A D | jit_uni_x8s8s32x_deconvolution.hpp | 41 template <cpu_isa_t isa, typename Vmm> 47 template <cpu_isa_t isa, typename Vmm> 107 const Vmm vmm_tmp_ = Vmm(3); 108 const Vmm vmm_one_ = Vmm(2); 110 const Vmm vmm_zero_ = Vmm(0); 112 const Vmm &vmm_wei_ = vmm_zero_; 115 const Vmm vmm_shift_ = Vmm(1); 116 const Vmm vmm_comp_ = Vmm(1); 117 const Vmm &vmm_bias_ = vmm_zero_; 123 Vmm vmm_bias_alpha() const; [all …]
|
H A D | jit_uni_batch_normalization.cpp | 529 Vmm b = Vmm(0); in mean_channels() 552 uni_vaddps(Vmm(ch_idx), Vmm(ch_idx), Vmm(sp_idx++)); in mean_variance_nspc() 571 uni_vfmadd231ps(Vmm(ch_idx), Vmm(30), Vmm(30)); in mean_variance_nspc() 800 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance() 860 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in compute_mean_variance() 861 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance() 932 uni_vpxor(Vmm(1), Vmm(1), Vmm(1)); in compute_mean_variance() 1472 uni_vaddps(Vmm(idx), Vmm(idx), Vmm(idx + 1)); in backward_diff_channels_nspc_compute() 1565 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward() 1636 uni_vpxor(Vmm(0), Vmm(0), Vmm(0)); in backward() [all …]
|
H A D | jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp | 32 template <typename Vmm> 80 const Vmm vmm_tmp = Vmm(28); 81 const Vmm vmm_saturation = Vmm(28); 82 const Vmm vmm_one = Vmm(29); 83 const Vmm vmm_zero = Vmm(30); 84 const Vmm vmm_prev_dst = Vmm(30); 85 const Vmm vmm_shift = Vmm(30); 86 const Vmm vmm_bcast = Vmm(31); 87 const Vmm vmm_bias_alpha = Vmm(31); 90 const Vmm vmm_zp = Vmm(30); [all …]
|
H A D | jit_uni_dw_conv_kernel_f32.hpp | 84 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg() 85 inline Vmm get_src_reg(int idx) { return Vmm(idx + 1); } in get_src_reg() 91 inline Vmm get_acc_reg(int idx) { return Vmm(get_acc_reg_idx(idx)); } in get_acc_reg() 95 void add_tail_from_mem(Vmm &vmm_acc, Vmm &vmm_tmp, const Xbyak::Reg64 ®, 143 inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); } in get_ker_reg() 144 inline Vmm get_ddst_reg(int idx) { return Vmm(idx + 1); } in get_ddst_reg() 145 inline Vmm get_acc_reg(int idx) { return Vmm(idx + 4); } in get_acc_reg() 218 inline Vmm get_bias_reg(int idx = 0) { return Vmm(idx); } in get_bias_reg() 223 return Vmm(vmm_idx); in get_output_reg() 237 inline Vmm get_aux_reg() { return Vmm(0); } in get_aux_reg() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/injectors/ |
H A D | jit_uni_eltwise_injector.hpp | 188 void vec_shift(const Vmm &vmm_dst, const Vmm &vmm_src, bool shift_left, 190 void compute_cmp_mask(const Vmm &vmm_src, 195 void exp_compute_vector_fwd(const Vmm &vmm_src); 198 void elu_compute_vector_fwd(const Vmm &vmm_src); 201 void abs_compute_vector_fwd(const Vmm &vmm_src); 211 void log_compute_vector_fwd(const Vmm &vmm_src); 213 void pow_compute_vector_fwd(const Vmm &vmm_src); 218 void exp_compute_vector_bwd(const Vmm &vmm_src); 220 void elu_compute_vector_bwd(const Vmm &vmm_src); 223 void abs_compute_vector_bwd(const Vmm &vmm_src); [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/injectors/ |
H A D | jit_uni_eltwise_injector.hpp | 74 using Vmm = typename cpu_isa_traits<isa>::Vmm; typedef 184 void vec_shift(const Vmm &vmm_dst, const Vmm &vmm_src, bool shift_left, 186 void compute_cmp_mask(const Vmm &vmm_src, 191 void exp_compute_vector_fwd(const Vmm &vmm_src); 194 void elu_compute_vector_fwd(const Vmm &vmm_src); 197 void abs_compute_vector_fwd(const Vmm &vmm_src); 207 void log_compute_vector_fwd(const Vmm &vmm_src); 209 void pow_compute_vector_fwd(const Vmm &vmm_src); 214 void exp_compute_vector_bwd(const Vmm &vmm_src); 216 void elu_compute_vector_bwd(const Vmm &vmm_src); [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/x64/utils/ |
H A D | jit_io_helper.cpp | 67 template <typename Vmm> 118 template <typename Vmm> 121 template <typename Vmm> 160 void jit_io_helper_t<Vmm>::prepare_i8_data_to_store(const Vmm &i8_vmm) { in prepare_i8_data_to_store() 383 const Vmm vmm_mask = Vmm(gather_conf_->full_vmm_mask_idx_); in init_full_mask() 401 const Vmm &indices_vmm, const Vmm &dst_vmm, const bool tail) { in gather() 406 const Vmm &mask = tail ? Vmm(tail_conf_->tail_vmm_mask_idx_) in gather() 538 void jit_io_helper_t<Vmm>::store(const Vmm &src_raw_vmm, in store() 578 void jit_io_helper_t<Vmm>::saturate(const Vmm &vmm) { in saturate() 587 void jit_io_helper_t<Vmm>::store_byte_by_byte(const Vmm &src_vmm, in store_byte_by_byte() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/x64/utils/ |
H A D | jit_io_helper.cpp | 67 template <typename Vmm> 118 template <typename Vmm> 121 template <typename Vmm> 160 void jit_io_helper_t<Vmm>::prepare_i8_data_to_store(const Vmm &i8_vmm) { in prepare_i8_data_to_store() 383 const Vmm vmm_mask = Vmm(gather_conf_->full_vmm_mask_idx_); in init_full_mask() 401 const Vmm &indices_vmm, const Vmm &dst_vmm, const bool tail) { in gather() 406 const Vmm &mask = tail ? Vmm(tail_conf_->tail_vmm_mask_idx_) in gather() 538 void jit_io_helper_t<Vmm>::store(const Vmm &src_raw_vmm, in store() 578 void jit_io_helper_t<Vmm>::saturate(const Vmm &vmm) { in saturate() 587 void jit_io_helper_t<Vmm>::store_byte_by_byte(const Vmm &src_vmm, in store_byte_by_byte() [all …]
|