Home
last modified time | relevance | path

Searched refs:ZReg (Results 1 – 25 of 162) sorted by relevance

1234567

/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/
H A Djit_sve_512_x8s8s32x_conv_kernel.hpp129 const ZReg vmm_wei = ZReg(31);
132 const ZReg vmm_bias = ZReg(31);
135 const ZReg vmm_prev_dst = ZReg(31);
137 const ZReg vmm_saturation = ZReg(30);
138 const ZReg vmm_zero = ZReg(31);
142 const ZReg vmm_tmp = ZReg(28); // not used for depthwise
149 const ZReg zmm_wei = ZReg(31);
150 ZReg zmm_tmp = ZReg(0);
151 ZReg zmm_src = ZReg(0);
152 ZReg zmm_shifted_zero = ZReg(0);
[all …]
H A Djit_uni_dw_conv_kernel_f32.hpp91 return ZReg(idx + 0); in get_ker_reg()
99 return ZReg(idx + 1); in get_src_reg()
108 return ZReg(idx + 4); in get_acc_reg()
152 inline ZReg get_ker_reg(int idx) { return ZReg(idx + 0); } in get_ker_reg()
153 inline ZReg get_src_reg(int idx) { return ZReg(idx + 1); } in get_src_reg()
154 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 4); } in get_acc_reg()
201 inline ZReg get_bias_reg(int idx = 0) { return ZReg(idx); } in get_bias_reg()
202 inline ZReg get_output_reg(int idx) { return ZReg(idx + 1); } in get_output_reg()
203 inline ZReg get_input_reg(int idx) { return ZReg(idx + 5); } in get_input_reg()
204 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 2); } in get_acc_reg()
[all …]
H A Djit_uni_batch_normalization_s8.cpp76 ZReg vzero = z29;
77 ZReg vone = z30;
78 ZReg veps = z31;
108 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2) { in uni_fmax()
187 void compute_vscaleshift(const ZReg &vscale, const ZReg &vshift, in compute_vscaleshift()
188 const ZReg &vmean, const ZReg &vsqrtvar, size_t offt, in compute_vscaleshift()
292 ZReg v = ZReg(0); in compute_dst()
293 ZReg vscale = ZReg(1); in compute_dst()
294 ZReg vshift = ZReg(2); in compute_dst()
295 ZReg vmean = ZReg(3); in compute_dst()
[all …]
H A Djit_uni_pool_kernel.hpp63 ZReg yreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in yreg()
64 ZReg zreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in zreg()
68 ZReg ymm_tmp_1 = ZReg(0);
80 ZReg ymm_tmp = ZReg(3);
91 ZReg z_tmp0 = z4;
H A Djit_uni_softmax.cpp147 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2, in uni_fmax()
347 void store(const XReg &addr, const ZReg &vmm, bool tail = false) { in store()
354 void load(const ZReg &vmm, const XReg &addr, bool tail = false) { in load()
381 void get_horizontal_op(const ZReg &v, const ZReg &vtmp, op_t op) override { in get_horizontal_op()
450 ZReg vreg_tmp_src = ZReg(i + 1); in compute_dst()
469 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in accumulate_vsbr()
470 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in accumulate_vsbr()
487 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in compute_diff_src()
488 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in compute_diff_src()
H A Djit_uni_1x1_conv_utils.hpp157 Xbyak_aarch64::ZReg reg_zero = Xbyak_aarch64::ZReg(0);
158 Xbyak_aarch64::ZReg reg_v = Xbyak_aarch64::ZReg(1);
184 ZReg res = ZReg(idx); in rtus_driver_t()
187 case sve_512: res = ZReg(idx); break; in rtus_driver_t()
188 default: assert(!"Not supported isa"); res = ZReg(idx); in rtus_driver_t()
195 case 4: res = ZReg(idx); break; in rtus_driver_t()
198 res = ZReg(idx); in rtus_driver_t()
H A Djit_uni_dw_conv_kernel_f32.cpp64 ldr(ZReg(0), ptr(reg_tmp_addr)); in load_src()
103 ZReg zreg_ker = get_ker_reg(0); in apply_filter_unrolled()
116 ZReg zreg_src = get_src_reg(0); in apply_filter_unrolled()
415 ZReg zreg_ker = get_ker_reg(0); in apply_filter()
425 ZReg zreg_src = get_src_reg(0); in apply_filter()
588 ZReg zreg_acc = get_acc_reg(i); in load_filter()
607 ZReg zreg_bias = get_bias_reg(0); in load_bias()
634 ZReg zreg_output = get_output_reg(0); in compute_ow_step_unroll()
724 ldr(ZReg(31), ptr(reg_tmp_addr)); in compute_bias_step_unroll()
741 ZReg zreg_acc = get_acc_reg(i); in store_filter()
[all …]
H A Djit_generator.hpp220 void uni_clear(const Xbyak_aarch64::ZReg &dst) { eor(dst.d, dst.d, dst.d); } in uni_clear()
274 void uni_eor(const Xbyak_aarch64::ZReg &z1, const Xbyak_aarch64::ZReg &z2, in uni_eor()
275 const Xbyak_aarch64::ZReg &z3) { in uni_eor()
287 const Xbyak_aarch64::ZReg &dst, const Xbyak_aarch64::XReg &addr) { in uni_ldr()
297 const Xbyak_aarch64::ZReg &src, const Xbyak_aarch64::XReg &addr) { in uni_str()
H A Djit_sve_512_x8s8s32x_conv_kernel.cpp162 auto zmm_tmp1 = ZReg(31); in store_output()
163 auto zmm_tmp2 = ZReg(30); in store_output()
164 auto zmm_tmp3 = ZReg(29); in store_output()
335 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker_dw()
382 auto zmm_tmp = ZReg(31); in compute_ker_dw()
425 auto zmm_tmp = ZReg(30); in compute_ker_dw()
464 auto zmm_tmp = ZReg(31); in compute_ker_dw()
529 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker()
882 ZReg src, XReg reg_addr, bool mask_flag) { in vmm_load_src()
949 auto vmm_tmp1 = ZReg(31); in generate()
[all …]
H A Djit_sve_512_conv_kernel.cpp136 return ZReg(idx); in store_output()
338 return ZReg(idx); in compute_loop_fma_core()
1190 return ZReg(zreg_idx); in store_output()
1201 return ZReg(idx); in store_output()
2452 ldr(ZReg(zreg_idx), in compute_ic_block_step()
2572 ldr(ZReg(zreg_idx), in compute_ic_block_step()
2704 str(ZReg(zreg_idx), in compute_ic_block_step()
3290 str(ZReg(0), in maybe_zero_kernel()
3327 ldr(ZReg(0), ptr(reg_bias)); in bias_kernel_2d()
3342 str(ZReg(0), ptr(reg_bias)); in bias_kernel_2d()
[all …]
H A Djit_uni_batch_normalization.cpp74 using TReg = typename utils::conditional<isa == asimd, VReg, ZReg>::type;
337 void fwd_process_relu_sve_512_common(ZReg vdst, int offt = 0) { in fwd_process_relu_sve_512_common()
370 void bwd_process_relu_sve_512_common(ZReg vdiff_dst, int offt = 0) { in bwd_process_relu_sve_512_common()
404 void uni_load_spat_data(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_load_spat_data()
486 void uni_ldr(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_ldr()
510 void uni_str(const ZReg &z, const XReg &base, in uni_str()
516 void uni_stnt1w(const ZReg &z, const XReg &base, in uni_stnt1w()
794 fwd_process_relu_sve_512_common(ZReg(idx)); in forward_channels_nspc_compute()
1162 ZReg(IDX(v)), offt); in forward_channels()
1336 ZReg(IDX(t2)), offt); in backward_sh_channels()
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/
H A Djit_sve_512_x8s8s32x_conv_kernel.hpp129 const ZReg vmm_wei = ZReg(31);
132 const ZReg vmm_bias = ZReg(31);
135 const ZReg vmm_prev_dst = ZReg(31);
137 const ZReg vmm_saturation = ZReg(30);
138 const ZReg vmm_zero = ZReg(31);
142 const ZReg vmm_tmp = ZReg(28); // not used for depthwise
149 const ZReg zmm_wei = ZReg(31);
150 ZReg zmm_tmp = ZReg(0);
151 ZReg zmm_src = ZReg(0);
152 ZReg zmm_shifted_zero = ZReg(0);
[all …]
H A Djit_uni_dw_conv_kernel_f32.hpp91 return ZReg(idx + 0); in get_ker_reg()
99 return ZReg(idx + 1); in get_src_reg()
108 return ZReg(idx + 4); in get_acc_reg()
152 inline ZReg get_ker_reg(int idx) { return ZReg(idx + 0); } in get_ker_reg()
153 inline ZReg get_src_reg(int idx) { return ZReg(idx + 1); } in get_src_reg()
154 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 4); } in get_acc_reg()
201 inline ZReg get_bias_reg(int idx = 0) { return ZReg(idx); } in get_bias_reg()
202 inline ZReg get_output_reg(int idx) { return ZReg(idx + 1); } in get_output_reg()
203 inline ZReg get_input_reg(int idx) { return ZReg(idx + 5); } in get_input_reg()
204 inline ZReg get_acc_reg(int idx) { return ZReg(idx + 2); } in get_acc_reg()
[all …]
H A Djit_uni_batch_normalization_s8.cpp76 ZReg vzero = z29;
77 ZReg vone = z30;
78 ZReg veps = z31;
108 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2) { in uni_fmax()
187 void compute_vscaleshift(const ZReg &vscale, const ZReg &vshift, in compute_vscaleshift()
188 const ZReg &vmean, const ZReg &vsqrtvar, size_t offt, in compute_vscaleshift()
292 ZReg v = ZReg(0); in compute_dst()
293 ZReg vscale = ZReg(1); in compute_dst()
294 ZReg vshift = ZReg(2); in compute_dst()
295 ZReg vmean = ZReg(3); in compute_dst()
[all …]
H A Djit_uni_pool_kernel.hpp63 ZReg yreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in yreg()
64 ZReg zreg(int idx) const noexcept { return ZReg(reg_idx(idx)); } in zreg()
68 ZReg ymm_tmp_1 = ZReg(0);
80 ZReg ymm_tmp = ZReg(3);
91 ZReg z_tmp0 = z4;
H A Djit_uni_softmax.cpp147 void uni_fmax(const ZReg &dst, const ZReg &src, const ZReg &src2, in uni_fmax()
347 void store(const XReg &addr, const ZReg &vmm, bool tail = false) { in store()
354 void load(const ZReg &vmm, const XReg &addr, bool tail = false) { in load()
381 void get_horizontal_op(const ZReg &v, const ZReg &vtmp, op_t op) override { in get_horizontal_op()
450 ZReg vreg_tmp_src = ZReg(i + 1); in compute_dst()
469 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in accumulate_vsbr()
470 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in accumulate_vsbr()
487 ZReg vreg_tmp_dst = ZReg(i * 2 + 1); in compute_diff_src()
488 ZReg vreg_tmp_diff_dst = ZReg(i * 2 + 2); in compute_diff_src()
H A Djit_uni_1x1_conv_utils.hpp157 Xbyak_aarch64::ZReg reg_zero = Xbyak_aarch64::ZReg(0);
158 Xbyak_aarch64::ZReg reg_v = Xbyak_aarch64::ZReg(1);
184 ZReg res = ZReg(idx); in rtus_driver_t()
187 case sve_512: res = ZReg(idx); break; in rtus_driver_t()
188 default: assert(!"Not supported isa"); res = ZReg(idx); in rtus_driver_t()
195 case 4: res = ZReg(idx); break; in rtus_driver_t()
198 res = ZReg(idx); in rtus_driver_t()
H A Djit_uni_dw_conv_kernel_f32.cpp64 ldr(ZReg(0), ptr(reg_tmp_addr)); in load_src()
103 ZReg zreg_ker = get_ker_reg(0); in apply_filter_unrolled()
116 ZReg zreg_src = get_src_reg(0); in apply_filter_unrolled()
415 ZReg zreg_ker = get_ker_reg(0); in apply_filter()
425 ZReg zreg_src = get_src_reg(0); in apply_filter()
588 ZReg zreg_acc = get_acc_reg(i); in load_filter()
607 ZReg zreg_bias = get_bias_reg(0); in load_bias()
634 ZReg zreg_output = get_output_reg(0); in compute_ow_step_unroll()
724 ldr(ZReg(31), ptr(reg_tmp_addr)); in compute_bias_step_unroll()
741 ZReg zreg_acc = get_acc_reg(i); in store_filter()
[all …]
H A Djit_generator.hpp220 void uni_clear(const Xbyak_aarch64::ZReg &dst) { eor(dst.d, dst.d, dst.d); } in uni_clear()
274 void uni_eor(const Xbyak_aarch64::ZReg &z1, const Xbyak_aarch64::ZReg &z2, in uni_eor()
275 const Xbyak_aarch64::ZReg &z3) { in uni_eor()
287 const Xbyak_aarch64::ZReg &dst, const Xbyak_aarch64::XReg &addr) { in uni_ldr()
297 const Xbyak_aarch64::ZReg &src, const Xbyak_aarch64::XReg &addr) { in uni_str()
H A Djit_sve_512_x8s8s32x_conv_kernel.cpp162 auto zmm_tmp1 = ZReg(31); in store_output()
163 auto zmm_tmp2 = ZReg(30); in store_output()
164 auto zmm_tmp3 = ZReg(29); in store_output()
335 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker_dw()
382 auto zmm_tmp = ZReg(31); in compute_ker_dw()
425 auto zmm_tmp = ZReg(30); in compute_ker_dw()
464 auto zmm_tmp = ZReg(31); in compute_ker_dw()
529 auto compute = [=](ZReg vreg_acc, ZReg vreg_wei, ZReg vreg_src) { in compute_ker()
882 ZReg src, XReg reg_addr, bool mask_flag) { in vmm_load_src()
949 auto vmm_tmp1 = ZReg(31); in generate()
[all …]
H A Djit_sve_512_conv_kernel.cpp136 return ZReg(idx); in store_output()
338 return ZReg(idx); in compute_loop_fma_core()
1190 return ZReg(zreg_idx); in store_output()
1201 return ZReg(idx); in store_output()
2452 ldr(ZReg(zreg_idx), in compute_ic_block_step()
2572 ldr(ZReg(zreg_idx), in compute_ic_block_step()
2704 str(ZReg(zreg_idx), in compute_ic_block_step()
3290 str(ZReg(0), in maybe_zero_kernel()
3327 ldr(ZReg(0), ptr(reg_bias)); in bias_kernel_2d()
3342 str(ZReg(0), ptr(reg_bias)); in bias_kernel_2d()
[all …]
H A Djit_uni_batch_normalization.cpp74 using TReg = typename utils::conditional<isa == asimd, VReg, ZReg>::type;
337 void fwd_process_relu_sve_512_common(ZReg vdst, int offt = 0) { in fwd_process_relu_sve_512_common()
370 void bwd_process_relu_sve_512_common(ZReg vdiff_dst, int offt = 0) { in bwd_process_relu_sve_512_common()
404 void uni_load_spat_data(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_load_spat_data()
486 void uni_ldr(const ZReg &z, const XReg &x) { ldr(z, ptr(x)); } in uni_ldr()
510 void uni_str(const ZReg &z, const XReg &base, in uni_str()
516 void uni_stnt1w(const ZReg &z, const XReg &base, in uni_stnt1w()
794 fwd_process_relu_sve_512_common(ZReg(idx)); in forward_channels_nspc_compute()
1162 ZReg(IDX(v)), offt); in forward_channels()
1336 ZReg(IDX(t2)), offt); in backward_sh_channels()
[all …]
/dports/devel/monotone/monotone-1.1/test/func/importing_cvs_problematic_repo/cvs-repository/test/
H A Drcsfile,v233 class HashTable : public HashTableBase<ZReg, ht_iterator<ZReg> > {
253 ZReg* lookup(ZReg* zRegBank);
256 ZReg* insert(register ZReg*);
296 class HashTable : public HashTableBase<ZReg, iterator<ZReg> > {
408 ZReg* lookup(ZReg* zRegBank);
414 ZReg* insert(register ZReg*);
610 inline ZReg* lookup(ZReg* zRegBank);
643 inline ZReg* HashTable::lookup(ZReg* zRegBank) {
891 ZReg* lookup(ZReg* zRegBank) {
1163 return (ZReg*) (item + sizeof(ZReg));
[all …]
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/injectors/
H A Djit_uni_eltwise_injector.cpp570 h->str(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd()
576 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd()
782 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd()
789 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd()
1011 h->str(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd()
1017 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd()
1115 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd()
1121 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd()
1172 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd()
1182 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd()
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/injectors/
H A Djit_uni_eltwise_injector.cpp570 h->str(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd()
576 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_fwd()
782 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd()
789 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_fwd()
1011 h->str(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd()
1017 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_tanh_compute_vector_bwd()
1115 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd()
1121 h->ldr(ZReg(IDX(vmm_aux0)), ptr(h->X_TMP_0)); in swish_compute_vector_bwd()
1172 h->str(ZReg(IDX(vmm_src)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd()
1182 h->ldr(ZReg(IDX(vmm_aux2)), ptr(h->X_TMP_0)); in gelu_erf_compute_vector_bwd()
[all …]

1234567