Home
last modified time | relevance | path

Searched refs:simd_size (Results 1 – 25 of 199) sorted by relevance

12345678

/dports/graphics/opencv/opencv-4.5.3/modules/core/test/
H A Dtest_intrin.cpp57 #define DEFINE_SIMD_TESTS(simd_size, cpu_opt) \ argument
58 TEST(hal_intrin ## simd_size, uint8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u…
59 TEST(hal_intrin ## simd_size, int8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i…
60 TEST(hal_intrin ## simd_size, uint16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u…
61 TEST(hal_intrin ## simd_size, int16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i…
62 TEST(hal_intrin ## simd_size, int32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i…
63 TEST(hal_intrin ## simd_size, uint32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u…
64 TEST(hal_intrin ## simd_size, uint64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u…
65 TEST(hal_intrin ## simd_size, int64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i…
66 TEST(hal_intrin ## simd_size, float32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_f…
[all …]
/dports/math/onednn/oneDNN-2.5.1/src/gpu/jit/conv/
H A Dfma_support.hpp154 int simd_size; member in dnnl::impl::gpu::jit::dpas_t
163 dpas_t(bool is_dpasw, int simd_size, int sdepth, int rcount, in dpas_t() argument
167 , simd_size(simd_size) in dpas_t()
180 static func_t make(const type_t &dst_type, int simd_size, in IR_DECL_DERIVED_TYPE_ID()
193 && (simd_size == other.simd_size) in is_equal()
219 int dst_size() const { return simd_size * dst_type.size(); } in dst_size()
246 int get_simd_size() const { return simd_size; } in get_simd_size()
252 int simd_size; member in dnnl::impl::gpu::jit::mad_t
262 , simd_size(simd_size) in mad_t()
266 ir_assert(math::is_pow2(simd_size)); in mad_t()
[all …]
H A Dfma_support.cpp93 if (simd_size == 8) { in a_layout()
97 if (simd_size == 16) { in a_layout()
114 std::vector<dim_t> dims = {rcount, simd_size}; in c_layout()
119 int m_blk = simd_size; in matches()
H A Dconfig.hpp539 oc_thr_blk = simd_size; in init_bwd_w()
540 ic_thr_blk = (ic < simd_size ? utils::rnd_up_pow2(ic) : simd_size); in init_bwd_w()
542 std::min(utils::div_up(simd_size, ic_thr_blk), kw)); in init_bwd_w()
862 bool is_small_ic() const { return ic < simd_size; } in is_small_ic()
873 lws[i] = tg_grid_dim[i] * (i == 0 ? simd_size : 1); in nd_range()
920 int simd_size; // SIMD width. member in dnnl::impl::gpu::jit::conv_config_t
1117 simd_size = fma_kind::get_simd_size( in init_fma_kind()
1132 simd_size = fma_kind::get_simd_size( in init_fma_kind()
1139 simd_size = fma_kind::get_simd_size( in init_fma_kind()
1180 if (simd_size > max_simd_size) simd_size = max_simd_size; in init_data_layouts()
[all …]
H A Dgen_convolution.cpp138 primitive, engine, cfg.simd_size, cfg.regs, with_dpas); in init()
186 {utils::div_up(size, bytes_per_thr) * cfg.simd_size}); in execute()
369 this, engine, cfg.simd_size, cfg.regs, with_dpas); in init()
388 {utils::div_up(elems, elems_per_thr) * cfg.simd_size, 1, 1}); in execute()
401 {utils::div_up(elems, elems_per_thr) * cfg.simd_size, 1, 1}); in execute()
/dports/math/mfem/mfem-4.3/linalg/
H A Dsimd.hpp79 static const int simd_size = MFEM_SIMD_BYTES/sizeof(real_t); member
81 typedef AutoSIMD<complex_t, simd_size, MFEM_SIMD_BYTES> vcomplex_t;
82 typedef AutoSIMD<real_t, simd_size, MFEM_SIMD_BYTES> vreal_t;
83 typedef AutoSIMD<int, simd_size, simd_size*sizeof(int)> vint_t;
96 static const int simd_size = 1; member
98 typedef AutoSIMD<complex_t, simd_size, align_bytes> vcomplex_t;
99 typedef AutoSIMD<real_t, simd_size, align_bytes> vreal_t;
100 typedef AutoSIMD<int, simd_size, simd_size*sizeof(int)> vint_t;
/dports/devel/xsimd/xsimd-7.6.0/include/xsimd/stl/
H A Dalgorithms.hpp26 std::size_t simd_size = traits::size; in transform() local
32 std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); in transform()
43 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform()
62 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform()
83 std::size_t simd_size = traits::size; in transform() local
101 for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ in transform()
153 constexpr std::size_t simd_size = traits::size;
155 if(size < simd_size)
179 ptr += simd_size;
180 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
[all …]
/dports/devel/py-pythran/pythran-0.11.0/third_party/xsimd/stl/
H A Dalgorithms.hpp31 std::size_t simd_size = batch_type::size; in transform() local
37 std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); in transform()
47 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform()
65 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform()
85 std::size_t simd_size = batch_type::size; in transform() local
103 for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ in transform()
152 constexpr std::size_t simd_size = batch_type::size;
154 if (size < simd_size)
177 ptr += simd_size;
178 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
[all …]
/dports/devel/py-pythran/pythran-0.11.0/pythran/xsimd/stl/
H A Dalgorithms.hpp31 std::size_t simd_size = batch_type::size; in transform() local
37 std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); in transform()
47 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform()
65 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform()
85 std::size_t simd_size = batch_type::size; in transform() local
103 for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ in transform()
154 constexpr std::size_t simd_size = batch_type::size;
156 if(size < simd_size)
179 ptr += simd_size;
180 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
[all …]
/dports/science/lammps/lammps-stable_29Sep2021/lib/gpu/
H A Dlal_neighbor_gpu.cu190 int simd_size = simd_size(); in calc_neigh_list_cell() local
191 int subgroup_id_local = tid / simd_size; in calc_neigh_list_cell()
193 int lane_id = tid % simd_size; in calc_neigh_list_cell()
209 simd_size + lane_id; in calc_neigh_list_cell()
218 int end_idx = simd_size; in calc_neigh_list_cell()
231 offset += simd_size; in calc_neigh_list_cell()
236 offset = subgroup_id_local * simd_size; in calc_neigh_list_cell()
250 int remainder = num_iter % simd_size; in calc_neigh_list_cell()
286 end_idx = simd_size; in calc_neigh_list_cell()
288 cell_pos += simd_size; in calc_neigh_list_cell()
[all …]
H A Dlal_aux_fun1.h235 block_reduce_add1(simd_size(), red_acc, tid, energy); \
244 block_reduce_arr(6, simd_size(), red_acc, tid, virial); \
289 block_reduce_add2(simd_size(), red_acc, tid, energy, e_coul); \
300 block_reduce_arr(6, simd_size(), red_acc, tid, virial); \
385 const int vwidth = simd_size(); \
386 const int voffset = tid & (simd_size() - 1); \
387 const int bnum = tid/simd_size(); \
388 int active_subgs = BLOCK_SIZE_X/simd_size(); \
390 if (active_subgs < BLOCK_SIZE_X/simd_size()) __syncthreads(); \
463 const int vwidth = simd_size(); \
[all …]
H A Dlal_neighbor_shared.h79 const int simd_size) { in setup_auto_cell_size() argument
84 _simd_size = simd_size; in setup_auto_cell_size()
/dports/devel/ispc/ispc-1.16.1/examples/xpu/sgemm/
H A DNaiveCMSgemm_genx.cpp62 const int simd_size = 16; local
63 vector<float, simd_size> a, c; // assuming this is 2 consequent registers
66 vector<uint, simd_size> off(init_0_15);
67 vector<svmptr_t, simd_size> offseta;
69 vector<svmptr_t, simd_size> offsetc;
72 for (; m < end; m += simd_size) {
74 if (m + simd_size > end) {
100 vector<float, simd_size> a_rem, c_rem;
101 vector<svmptr_t, simd_size> a_rem_offset, c_rem_offset;
/dports/graphics/opencv/opencv-4.5.3/modules/dnn/src/ocl4dnn/src/
H A Docl4dnn_conv_spatial.cpp348 int32_t simd_size; in setupKernelDetails() local
351 simd_size = blockN; in setupKernelDetails()
357 if (simd_size == 16) in setupKernelDetails()
394 addDef("FILTERS_IN_GROUP", ((int)alignSize(M_, simd_size) / simd_size)); in setupKernelDetails()
424 simd_size = blockK; in setupKernelDetails()
1418 int32_t simd_size = blockK; in createGEMMLikeConvKernel() local
1423 gy = alignSize(gy, simd_size); in createGEMMLikeConvKernel()
1484 blockN_ = simd_size; in createIDLFKernel()
1616 if (simd_size != 8 && simd_size != 16) in generate_idlf_tuneritems()
1650 if (tile_x > simd_size) in generate_idlf_tuneritems()
[all …]
/dports/devel/xsimd/xsimd-7.6.0/docs/source/
H A Dbasic_usage.rst54 constexpr std::size_t simd_size = xsimd::simd_type<double>::size;
55 std::size_t vec_size = size - size % simd_size;
57 for(std::size_t i = 0; i < vec_size; i += simd_size)
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/AdaptorCommon/
H A DProcessFuncAttributes.cpp899 int simd_size = 0; in checkKernelSimdSize() local
908 if (sz != 0 && simd_size == 0) in checkKernelSimdSize()
909 simd_size = sz; in checkKernelSimdSize()
913 IGC_ASSERT_MESSAGE(simd_size == sz, "Function is called with different sub group size"); in checkKernelSimdSize()
916 if (simd_size != 0) in checkKernelSimdSize()
918 IGC_ASSERT_MESSAGE((simd_size == 8) || (simd_size == 16) || (simd_size == 32), in checkKernelSimdSize()
927 sgHandle->setSIMD_size(simd_size); in checkKernelSimdSize()
929 else if (sgHandle->getSIMD_size() != simd_size) in checkKernelSimdSize()
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/Compiler/
H A DSPIRMetaDataTranslation.cpp175 int simd_size = reqdSubGroupSize->getSIMD_Size(); in runOnModule() local
176 if (!((simd_size == 8) || (simd_size == 16) || (simd_size == 32))) in runOnModule()
181 sgHandle->setSIMD_size(simd_size); in runOnModule()
/dports/math/xtensor/xtensor-0.24.0/include/xtensor/
H A Dxassign.hpp354 …static constexpr bool simd_assign() { return convertible_types() && simd_size() && simd_interface(… in simd_assign()
632 constexpr size_type simd_size = simd_type::size; in run() local
635 …size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size); in run()
636 size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); in run()
644 tbb::parallel_for(align_begin, align_end, simd_size, [&e1, &e2](size_t i) in run()
653 for (size_type i = align_begin; i < align_end; i += simd_size) in run()
659 i += static_cast<std::ptrdiff_t>(simd_size)) in run()
668 for (size_type i = align_begin; i < align_end; i += simd_size) in run()
674 for (size_type i = align_begin; i < align_end; i += simd_size) in run()
944 std::size_t simd_size = inner_loop_size / simd_type::size; in run() local
[all …]
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/ZEBinWriter/zebin/source/autogen/
H A DZEInfo.hpp35 …required_work_group_size == other.required_work_group_size && simd_size == other.simd_size && slm_… in operator ==()
53 zeinfo_int32_t simd_size = 0; member
/dports/devel/radare2/radare2-5.1.1/libr/asm/arch/arm/aarch64/
H A Daarch64-dis.c840 unsigned simd_size; in decode_limm() local
851 simd_size = 64; in decode_limm()
857 if (S >= 0x00 && S <= 0x1f) { simd_size = 32; } in decode_limm()
866 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in decode_limm()
867 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in decode_limm()
874 mask = (1ull << simd_size) - 1; in decode_limm()
876 R &= simd_size - 1; in decode_limm()
879 if (simd_size > esize * 8) in decode_limm()
883 if (S == simd_size - 1) in decode_limm()
890 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in decode_limm()
[all …]
/dports/graphics/mesa-dri-classic/mesa-20.2.3/src/mesa/drivers/dri/i965/
H A Dbrw_cs.h34 unsigned simd_size; member
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/ZEBinWriter/tools/
H A DTester.cpp27 k1.execution_env.simd_size = 8; in getTestZEInfo()
91 k2.execution_env.simd_size = 16; in getTestZEInfo()
/dports/lang/gnatdroid-binutils-x86/binutils-2.27/opcodes/
H A Daarch64-dis.c725 unsigned simd_size; in aarch64_ext_limm() local
745 simd_size = 64; in aarch64_ext_limm()
752 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in aarch64_ext_limm()
753 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in aarch64_ext_limm()
754 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; in aarch64_ext_limm()
755 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; in aarch64_ext_limm()
759 mask = (1ull << simd_size) - 1; in aarch64_ext_limm()
761 R &= simd_size - 1; in aarch64_ext_limm()
764 if (S == simd_size - 1) in aarch64_ext_limm()
771 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in aarch64_ext_limm()
[all …]
/dports/lang/gnatdroid-binutils/binutils-2.27/opcodes/
H A Daarch64-dis.c725 unsigned simd_size; in aarch64_ext_limm() local
745 simd_size = 64; in aarch64_ext_limm()
752 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in aarch64_ext_limm()
753 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in aarch64_ext_limm()
754 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; in aarch64_ext_limm()
755 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; in aarch64_ext_limm()
759 mask = (1ull << simd_size) - 1; in aarch64_ext_limm()
761 R &= simd_size - 1; in aarch64_ext_limm()
764 if (S == simd_size - 1) in aarch64_ext_limm()
771 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in aarch64_ext_limm()
[all …]
/dports/devel/gdb761/gdb-7.6.1/opcodes/
H A Daarch64-dis.c706 unsigned simd_size; in aarch64_ext_limm() local
726 simd_size = 64; in aarch64_ext_limm()
733 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in aarch64_ext_limm()
734 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in aarch64_ext_limm()
735 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; in aarch64_ext_limm()
736 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; in aarch64_ext_limm()
740 mask = (1ull << simd_size) - 1; in aarch64_ext_limm()
742 R &= simd_size - 1; in aarch64_ext_limm()
745 if (S == simd_size - 1) in aarch64_ext_limm()
752 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in aarch64_ext_limm()
[all …]

12345678