/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/ |
H A D | jit_sve_512_x8s8s32x_conv_kernel.hpp | 129 const ZReg vmm_wei = ZReg(31); 132 const ZReg vmm_bias = ZReg(31); 135 const ZReg vmm_prev_dst = ZReg(31); 137 const ZReg vmm_saturation = ZReg(30); 138 const ZReg vmm_zero = ZReg(31); 142 const ZReg vmm_tmp = ZReg(28); // not used for depthwise 149 const ZReg zmm_wei = ZReg(31); 150 ZReg zmm_tmp = ZReg(0); 151 ZReg zmm_src = ZReg(0); 152 ZReg zmm_shifted_zero = ZReg(0); [all …]
|
H A D | jit_uni_dw_conv_kernel_f32.hpp | 91 return ZReg(idx + 0); in get_ker_reg() 99 return ZReg(idx + 1); in get_src_reg() 108 return ZReg(idx + 4); in get_acc_reg() 152 inline ZReg get_ker_reg(int idx) { return ZReg(idx + 0); } in get_ker_reg() 153 inline ZReg get_src_reg(int idx) { return ZReg(idx + 1); } in get_src_reg() 154 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 4); } in get_acc_reg() 201 inline ZReg get_bias_reg(int idx = 0) { return ZReg(idx); } in get_bias_reg() 202 inline ZReg get_output_reg(int idx) { return ZReg(idx + 1); } in get_output_reg() 203 inline ZReg get_input_reg(int idx) { return ZReg(idx + 5); } in get_input_reg() 204 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 2); } in get_acc_reg() [all …]
|
H A D | jit_uni_batch_normalization_s8.cpp | 76 ZReg vzero = z29; 77 ZReg vone = z30; 78 ZReg veps = z31; 108 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2) { in uni_fmax() 187 void compute_vscaleshift(const ZReg &vscale, const ZReg &vshift, in compute_vscaleshift() 188 const ZReg &vmean, const ZReg &vsqrtvar, size_t offt, in compute_vscaleshift() 292 ZReg v = ZReg(0); in compute_dst() 293 ZReg vscale = ZReg(1); in compute_dst() 294 ZReg vshift = ZReg(2); in compute_dst() 295 ZReg vmean = ZReg(3); in compute_dst() [all …]
|
H A D | jit_uni_pool_kernel.hpp | 63 ZReg yreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in yreg() 64 ZReg zreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in zreg() 68 ZReg ymm_tmp_1 = ZReg(0); 80 ZReg ymm_tmp = ZReg(3); 91 ZReg z_tmp0 = z4;
|
H A D | jit_uni_softmax.cpp | 147 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2, in uni_fmax() 347 void store(const XReg &addr, const ZReg &vmm, bool tail = false) { in store() 354 void load(const ZReg &vmm, const XReg &addr, bool tail = false) { in load() 381 void get_horizontal_op(const ZReg &v, const ZReg &vtmp, op_t op) override { in get_horizontal_op() 450 ZReg vreg_tmp_src = ZReg(i + 1); in compute_dst() 469 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in accumulate_vsbr() 470 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in accumulate_vsbr() 487 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in compute_diff_src() 488 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in compute_diff_src()
|
H A D | jit_uni_1x1_conv_utils.hpp | 157 Xbyak_aarch64::ZReg reg_zero = Xbyak_aarch64::ZReg(0); 158 Xbyak_aarch64::ZReg reg_v = Xbyak_aarch64::ZReg(1); 184 ZReg res = ZReg(idx); in rtus_driver_t() 187 case sve_512: res = ZReg(idx); break; in rtus_driver_t() 188 default: assert(!"Not supported isa"); res = ZReg(idx); in rtus_driver_t() 195 case 4: res = ZReg(idx); break; in rtus_driver_t() 198 res = ZReg(idx); in rtus_driver_t()
|
H A D | jit_uni_dw_conv_kernel_f32.cpp | 64 ldr(ZReg(0), ptr(reg_tmp_addr)); in load_src() 103 ZReg zreg_ker = get_ker_reg(0); in apply_filter_unrolled() 116 ZReg zreg_src = get_src_reg(0); in apply_filter_unrolled() 415 ZReg zreg_ker = get_ker_reg(0); in apply_filter() 425 ZReg zreg_src = get_src_reg(0); in apply_filter() 588 ZReg zreg_acc = get_acc_reg(i); in load_filter() 607 ZReg zreg_bias = get_bias_reg(0); in load_bias() 634 ZReg zreg_output = get_output_reg(0); in compute_ow_step_unroll() 724 ldr(ZReg(31), ptr(reg_tmp_addr)); in compute_bias_step_unroll() 741 ZReg zreg_acc = get_acc_reg(i); in store_filter() [all …]
|
H A D | jit_generator.hpp | 220 void uni_clear(const Xbyak_aarch64::ZReg &dst) { eor(dst.d, dst.d, dst.d); } in uni_clear() 274 void uni_eor(const Xbyak_aarch64::ZReg &z1, const Xbyak_aarch64::ZReg &z2, in uni_eor() 275 const Xbyak_aarch64::ZReg &z3) { in uni_eor() 287 const Xbyak_aarch64::ZReg &dst, const Xbyak_aarch64::XReg &addr) { in uni_ldr() 297 const Xbyak_aarch64::ZReg &src, const Xbyak_aarch64::XReg &addr) { in uni_str()
|
H A D | jit_sve_512_x8s8s32x_conv_kernel.cpp | 162 auto zmm_tmp1 = ZReg(31); in store_output() 163 auto zmm_tmp2 = ZReg(30); in store_output() 164 auto zmm_tmp3 = ZReg(29); in store_output() 335 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker_dw() 382 auto zmm_tmp = ZReg(31); in compute_ker_dw() 425 auto zmm_tmp = ZReg(30); in compute_ker_dw() 464 auto zmm_tmp = ZReg(31); in compute_ker_dw() 529 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker() 882 ZReg src, XReg reg_addr, bool mask_flag) { in vmm_load_src() 949 auto vmm_tmp1 = ZReg(31); in generate() [all …]
|
H A D | jit_sve_512_conv_kernel.cpp | 136 return ZReg(idx); in store_output() 338 return ZReg(idx); in compute_loop_fma_core() 1190 return ZReg(zreg_idx); in store_output() 1201 return ZReg(idx); in store_output() 2452 ldr(ZReg(zreg_idx), in compute_ic_block_step() 2572 ldr(ZReg(zreg_idx), in compute_ic_block_step() 2704 str(ZReg(zreg_idx), in compute_ic_block_step() 3290 str(ZReg(0), in maybe_zero_kernel() 3327 ldr(ZReg(0), ptr(reg_bias)); in bias_kernel_2d() 3342 str(ZReg(0), ptr(reg_bias)); in bias_kernel_2d() [all …]
|
H A D | jit_uni_batch_normalization.cpp | 74 using TReg = typename utils::conditional<isa == asimd, VReg, ZReg>::type; 337 void fwd_process_relu_sve_512_common(ZReg vdst, int offt = 0) { in fwd_process_relu_sve_512_common() 370 void bwd_process_relu_sve_512_common(ZReg vdiff_dst, int offt = 0) { in bwd_process_relu_sve_512_common() 404 void uni_load_spat_data(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_load_spat_data() 486 void uni_ldr(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_ldr() 510 void uni_str(const ZReg &z, const XReg &base, in uni_str() 516 void uni_stnt1w(const ZReg &z, const XReg &base, in uni_stnt1w() 794 fwd_process_relu_sve_512_common(ZReg(idx)); in forward_channels_nspc_compute() 1162 ZReg(IDX(v)), offt); in forward_channels() 1336 ZReg(IDX(t2)), offt); in backward_sh_channels() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/ |
H A D | jit_sve_512_x8s8s32x_conv_kernel.hpp | 129 const ZReg vmm_wei = ZReg(31); 132 const ZReg vmm_bias = ZReg(31); 135 const ZReg vmm_prev_dst = ZReg(31); 137 const ZReg vmm_saturation = ZReg(30); 138 const ZReg vmm_zero = ZReg(31); 142 const ZReg vmm_tmp = ZReg(28); // not used for depthwise 149 const ZReg zmm_wei = ZReg(31); 150 ZReg zmm_tmp = ZReg(0); 151 ZReg zmm_src = ZReg(0); 152 ZReg zmm_shifted_zero = ZReg(0); [all …]
|
H A D | jit_uni_dw_conv_kernel_f32.hpp | 91 return ZReg(idx + 0); in get_ker_reg() 99 return ZReg(idx + 1); in get_src_reg() 108 return ZReg(idx + 4); in get_acc_reg() 152 inline ZReg get_ker_reg(int idx) { return ZReg(idx + 0); } in get_ker_reg() 153 inline ZReg get_src_reg(int idx) { return ZReg(idx + 1); } in get_src_reg() 154 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 4); } in get_acc_reg() 201 inline ZReg get_bias_reg(int idx = 0) { return ZReg(idx); } in get_bias_reg() 202 inline ZReg get_output_reg(int idx) { return ZReg(idx + 1); } in get_output_reg() 203 inline ZReg get_input_reg(int idx) { return ZReg(idx + 5); } in get_input_reg() 204 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 2); } in get_acc_reg() [all …]
|
H A D | jit_uni_batch_normalization_s8.cpp | 76 ZReg vzero = z29; 77 ZReg vone = z30; 78 ZReg veps = z31; 108 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2) { in uni_fmax() 187 void compute_vscaleshift(const ZReg &vscale, const ZReg &vshift, in compute_vscaleshift() 188 const ZReg &vmean, const ZReg &vsqrtvar, size_t offt, in compute_vscaleshift() 292 ZReg v = ZReg(0); in compute_dst() 293 ZReg vscale = ZReg(1); in compute_dst() 294 ZReg vshift = ZReg(2); in compute_dst() 295 ZReg vmean = ZReg(3); in compute_dst() [all …]
|
H A D | jit_uni_pool_kernel.hpp | 63 ZReg yreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in yreg() 64 ZReg zreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in zreg() 68 ZReg ymm_tmp_1 = ZReg(0); 80 ZReg ymm_tmp = ZReg(3); 91 ZReg z_tmp0 = z4;
|
H A D | jit_uni_softmax.cpp | 147 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2, in uni_fmax() 347 void store(const XReg &addr, const ZReg &vmm, bool tail = false) { in store() 354 void load(const ZReg &vmm, const XReg &addr, bool tail = false) { in load() 381 void get_horizontal_op(const ZReg &v, const ZReg &vtmp, op_t op) override { in get_horizontal_op() 450 ZReg vreg_tmp_src = ZReg(i + 1); in compute_dst() 469 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in accumulate_vsbr() 470 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in accumulate_vsbr() 487 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in compute_diff_src() 488 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in compute_diff_src()
|
H A D | jit_uni_1x1_conv_utils.hpp | 157 Xbyak_aarch64::ZReg reg_zero = Xbyak_aarch64::ZReg(0); 158 Xbyak_aarch64::ZReg reg_v = Xbyak_aarch64::ZReg(1); 184 ZReg res = ZReg(idx); in rtus_driver_t() 187 case sve_512: res = ZReg(idx); break; in rtus_driver_t() 188 default: assert(!"Not supported isa"); res = ZReg(idx); in rtus_driver_t() 195 case 4: res = ZReg(idx); break; in rtus_driver_t() 198 res = ZReg(idx); in rtus_driver_t()
|
H A D | jit_uni_dw_conv_kernel_f32.cpp | 64 ldr(ZReg(0), ptr(reg_tmp_addr)); in load_src() 103 ZReg zreg_ker = get_ker_reg(0); in apply_filter_unrolled() 116 ZReg zreg_src = get_src_reg(0); in apply_filter_unrolled() 415 ZReg zreg_ker = get_ker_reg(0); in apply_filter() 425 ZReg zreg_src = get_src_reg(0); in apply_filter() 588 ZReg zreg_acc = get_acc_reg(i); in load_filter() 607 ZReg zreg_bias = get_bias_reg(0); in load_bias() 634 ZReg zreg_output = get_output_reg(0); in compute_ow_step_unroll() 724 ldr(ZReg(31), ptr(reg_tmp_addr)); in compute_bias_step_unroll() 741 ZReg zreg_acc = get_acc_reg(i); in store_filter() [all …]
|
H A D | jit_generator.hpp | 220 void uni_clear(const Xbyak_aarch64::ZReg &dst) { eor(dst.d, dst.d, dst.d); } in uni_clear() 274 void uni_eor(const Xbyak_aarch64::ZReg &z1, const Xbyak_aarch64::ZReg &z2, in uni_eor() 275 const Xbyak_aarch64::ZReg &z3) { in uni_eor() 287 const Xbyak_aarch64::ZReg &dst, const Xbyak_aarch64::XReg &addr) { in uni_ldr() 297 const Xbyak_aarch64::ZReg &src, const Xbyak_aarch64::XReg &addr) { in uni_str()
|
H A D | jit_sve_512_x8s8s32x_conv_kernel.cpp | 162 auto zmm_tmp1 = ZReg(31); in store_output() 163 auto zmm_tmp2 = ZReg(30); in store_output() 164 auto zmm_tmp3 = ZReg(29); in store_output() 335 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker_dw() 382 auto zmm_tmp = ZReg(31); in compute_ker_dw() 425 auto zmm_tmp = ZReg(30); in compute_ker_dw() 464 auto zmm_tmp = ZReg(31); in compute_ker_dw() 529 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker() 882 ZReg src, XReg reg_addr, bool mask_flag) { in vmm_load_src() 949 auto vmm_tmp1 = ZReg(31); in generate() [all …]
|
H A D | jit_sve_512_conv_kernel.cpp | 136 return ZReg(idx); in store_output() 338 return ZReg(idx); in compute_loop_fma_core() 1190 return ZReg(zreg_idx); in store_output() 1201 return ZReg(idx); in store_output() 2452 ldr(ZReg(zreg_idx), in compute_ic_block_step() 2572 ldr(ZReg(zreg_idx), in compute_ic_block_step() 2704 str(ZReg(zreg_idx), in compute_ic_block_step() 3290 str(ZReg(0), in maybe_zero_kernel() 3327 ldr(ZReg(0), ptr(reg_bias)); in bias_kernel_2d() 3342 str(ZReg(0), ptr(reg_bias)); in bias_kernel_2d() [all …]
|
H A D | jit_uni_batch_normalization.cpp | 74 using TReg = typename utils::conditional<isa == asimd, VReg, ZReg>::type; 337 void fwd_process_relu_sve_512_common(ZReg vdst, int offt = 0) { in fwd_process_relu_sve_512_common() 370 void bwd_process_relu_sve_512_common(ZReg vdiff_dst, int offt = 0) { in bwd_process_relu_sve_512_common() 404 void uni_load_spat_data(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_load_spat_data() 486 void uni_ldr(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_ldr() 510 void uni_str(const ZReg &z, const XReg &base, in uni_str() 516 void uni_stnt1w(const ZReg &z, const XReg &base, in uni_stnt1w() 794 fwd_process_relu_sve_512_common(ZReg(idx)); in forward_channels_nspc_compute() 1162 ZReg(IDX(v)), offt); in forward_channels() 1336 ZReg(IDX(t2)), offt); in backward_sh_channels() [all …]
|
/dports/devel/monotone/monotone-1.1/test/func/importing_cvs_problematic_repo/cvs-repository/test/ |
H A D | rcsfile,v | 233 class HashTable : public HashTableBase<ZReg, ht_iterator<ZReg> > { 253 ZReg* lookup(ZReg* zRegBank); 256 ZReg* insert(register ZReg*); 296 class HashTable : public HashTableBase<ZReg, iterator<ZReg> > { 408 ZReg* lookup(ZReg* zRegBank); 414 ZReg* insert(register ZReg*); 610 inline ZReg* lookup(ZReg* zRegBank); 643 inline ZReg* HashTable::lookup(ZReg* zRegBank) { 891 ZReg* lookup(ZReg* zRegBank) { 1163 return (ZReg*) (item + sizeof(ZReg)); [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/injectors/ |
H A D | jit_uni_eltwise_injector.cpp | 570 h->str(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd() 576 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd() 782 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd() 789 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd() 1011 h->str(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd() 1017 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd() 1115 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd() 1121 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd() 1172 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd() 1182 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd() [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/injectors/ |
H A D | jit_uni_eltwise_injector.cpp | 570 h->str(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd() 576 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd() 782 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd() 789 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd() 1011 h->str(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd() 1017 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd() 1115 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd() 1121 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd() 1172 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd() 1182 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd() [all …]
|