/dports/graphics/opencv/opencv-4.5.3/modules/core/src/ |
H A D | mean.simd.hpp | 52 v_int16 v_tmp0, v_tmp1; in operator ()() local 54 v_sqsum += v_dotprod(v_tmp0, v_tmp0) + v_dotprod(v_tmp1, v_tmp1); in operator ()() 65 v_uint32 v_tmp0, v_tmp1; in operator ()() local 66 v_expand(v_src + v_half, v_tmp0, v_tmp1); in operator ()() 67 v_sum += v_reinterpret_as_s32(v_tmp0); in operator ()() 119 v_int16 v_tmp0, v_tmp1; in operator ()() local 120 v_zip(v_src0, v_src1, v_tmp0, v_tmp1); in operator ()() 121 v_sqsum += v_dotprod(v_tmp0, v_tmp0) + v_dotprod(v_tmp1, v_tmp1); in operator ()() 132 v_int32 v_tmp0, v_tmp1; in operator ()() local 133 v_expand(v_src + v_half, v_tmp0, v_tmp1); in operator ()() [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/3rdparty/mkldnn/src/cpu/aarch64/ |
H A D | jit_uni_softmax.cpp | 89 TReg v_tmp0 = TReg(23); member 386 mov(v_tmp0.s, P_ALL_ONE, v.s); in get_horizontal_op() 387 ext(v_tmp0.b, v.b, 48); in get_horizontal_op() 389 mov(vtmp.d, p_shuff0 / T_m, v_tmp0.d); in get_horizontal_op() 391 uzp2(v_tmp0.d, v.d, v.d); in get_horizontal_op() 392 trn1(vtmp.d, v_tmp0.d, v.d); in get_horizontal_op() 395 trn2(v_tmp0.s, v.s, v.s); in get_horizontal_op() 396 mov(vtmp.s, p_shuff1 / T_m, v_tmp0.s); in get_horizontal_op() 440 mov(v_tmp0.d, vsum.d); in accumulate_vsum() 442 fdiv(vsum.s, p_512 / T_m, v_tmp0.s); in accumulate_vsum()
|
/dports/math/onednn/oneDNN-2.5.1/src/cpu/aarch64/ |
H A D | jit_uni_softmax.cpp | 89 TReg v_tmp0 = TReg(23); member 386 mov(v_tmp0.s, P_ALL_ONE, v.s); in get_horizontal_op() 387 ext(v_tmp0.b, v.b, 48); in get_horizontal_op() 389 mov(vtmp.d, p_shuff0 / T_m, v_tmp0.d); in get_horizontal_op() 391 uzp2(v_tmp0.d, v.d, v.d); in get_horizontal_op() 392 trn1(vtmp.d, v_tmp0.d, v.d); in get_horizontal_op() 395 trn2(v_tmp0.s, v.s, v.s); in get_horizontal_op() 396 mov(vtmp.s, p_shuff1 / T_m, v_tmp0.s); in get_horizontal_op() 440 mov(v_tmp0.d, vsum.d); in accumulate_vsum() 442 fdiv(vsum.s, p_512 / T_m, v_tmp0.s); in accumulate_vsum()
|
/dports/graphics/opencv/opencv-4.5.3/modules/imgproc/src/ |
H A D | smooth.simd.hpp | 1341 v_int16 v_tmp0, v_tmp1; in vlineSmooth3N() local 1476 v_int16 v_tmp0, v_tmp1; in vlineSmooth5N() local 1512 v_res0 += v_dotprod(v_tmp0, v_mul23); in vlineSmooth5N() 1515 v_res2 += v_dotprod(v_tmp0, v_mul23); in vlineSmooth5N() 1665 v_int16 v_tmp0, v_tmp1; in vlineSmooth() local 1708 v_res0 += v_dotprod(v_tmp0, v_mul); in vlineSmooth() 1711 v_res2 += v_dotprod(v_tmp0, v_mul); in vlineSmooth() 1714 v_res4 += v_dotprod(v_tmp0, v_mul); in vlineSmooth() 1717 v_res6 += v_dotprod(v_tmp0, v_mul); in vlineSmooth() 1829 v_res0 += v_dotprod(v_tmp0, v_mul); in vlineSmoothONa_yzy_a() [all …]
|
H A D | resize.cpp | 707 v_int16 v_tmp0, v_tmp1; in vlineResize() local 708 v_zip(v_add_wrap(v_src00,v_128), v_add_wrap(v_src10,v_128), v_tmp0, v_tmp1); in vlineResize() 710 v_int32 v_res0 = v_dotprod(v_tmp0, v_mul); in vlineResize() 715 v_zip(v_add_wrap(v_src01,v_128), v_add_wrap(v_src11,v_128), v_tmp0, v_tmp1); in vlineResize() 716 v_int32 v_res2 = v_dotprod(v_tmp0, v_mul); in vlineResize()
|