/dports/graphics/opencv/opencv-4.5.3/modules/core/test/ |
H A D | test_intrin.cpp | 57 #define DEFINE_SIMD_TESTS(simd_size, cpu_opt) \ argument 58 TEST(hal_intrin ## simd_size, uint8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u… 59 TEST(hal_intrin ## simd_size, int8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i… 60 TEST(hal_intrin ## simd_size, uint16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u… 61 TEST(hal_intrin ## simd_size, int16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i… 62 TEST(hal_intrin ## simd_size, int32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i… 63 TEST(hal_intrin ## simd_size, uint32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u… 64 TEST(hal_intrin ## simd_size, uint64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_u… 65 TEST(hal_intrin ## simd_size, int64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_i… 66 TEST(hal_intrin ## simd_size, float32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_f… [all …]
|
/dports/math/onednn/oneDNN-2.5.1/src/gpu/jit/conv/ |
H A D | fma_support.hpp | 154 int simd_size; member in dnnl::impl::gpu::jit::dpas_t 163 dpas_t(bool is_dpasw, int simd_size, int sdepth, int rcount, in dpas_t() argument 167 , simd_size(simd_size) in dpas_t() 180 static func_t make(const type_t &dst_type, int simd_size, in IR_DECL_DERIVED_TYPE_ID() 193 && (simd_size == other.simd_size) in is_equal() 219 int dst_size() const { return simd_size * dst_type.size(); } in dst_size() 246 int get_simd_size() const { return simd_size; } in get_simd_size() 252 int simd_size; member in dnnl::impl::gpu::jit::mad_t 262 , simd_size(simd_size) in mad_t() 266 ir_assert(math::is_pow2(simd_size)); in mad_t() [all …]
|
H A D | fma_support.cpp | 93 if (simd_size == 8) { in a_layout() 97 if (simd_size == 16) { in a_layout() 114 std::vector<dim_t> dims = {rcount, simd_size}; in c_layout() 119 int m_blk = simd_size; in matches()
|
H A D | config.hpp | 539 oc_thr_blk = simd_size; in init_bwd_w() 540 ic_thr_blk = (ic < simd_size ? utils::rnd_up_pow2(ic) : simd_size); in init_bwd_w() 542 std::min(utils::div_up(simd_size, ic_thr_blk), kw)); in init_bwd_w() 862 bool is_small_ic() const { return ic < simd_size; } in is_small_ic() 873 lws[i] = tg_grid_dim[i] * (i == 0 ? simd_size : 1); in nd_range() 920 int simd_size; // SIMD width. member in dnnl::impl::gpu::jit::conv_config_t 1117 simd_size = fma_kind::get_simd_size( in init_fma_kind() 1132 simd_size = fma_kind::get_simd_size( in init_fma_kind() 1139 simd_size = fma_kind::get_simd_size( in init_fma_kind() 1180 if (simd_size > max_simd_size) simd_size = max_simd_size; in init_data_layouts() [all …]
|
H A D | gen_convolution.cpp | 138 primitive, engine, cfg.simd_size, cfg.regs, with_dpas); in init() 186 {utils::div_up(size, bytes_per_thr) * cfg.simd_size}); in execute() 369 this, engine, cfg.simd_size, cfg.regs, with_dpas); in init() 388 {utils::div_up(elems, elems_per_thr) * cfg.simd_size, 1, 1}); in execute() 401 {utils::div_up(elems, elems_per_thr) * cfg.simd_size, 1, 1}); in execute()
|
/dports/math/mfem/mfem-4.3/linalg/ |
H A D | simd.hpp | 79 static const int simd_size = MFEM_SIMD_BYTES/sizeof(real_t); member 81 typedef AutoSIMD<complex_t, simd_size, MFEM_SIMD_BYTES> vcomplex_t; 82 typedef AutoSIMD<real_t, simd_size, MFEM_SIMD_BYTES> vreal_t; 83 typedef AutoSIMD<int, simd_size, simd_size*sizeof(int)> vint_t; 96 static const int simd_size = 1; member 98 typedef AutoSIMD<complex_t, simd_size, align_bytes> vcomplex_t; 99 typedef AutoSIMD<real_t, simd_size, align_bytes> vreal_t; 100 typedef AutoSIMD<int, simd_size, simd_size*sizeof(int)> vint_t;
|
/dports/devel/xsimd/xsimd-7.6.0/include/xsimd/stl/ |
H A D | algorithms.hpp | 26 std::size_t simd_size = traits::size; in transform() local 32 std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); in transform() 43 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform() 62 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform() 83 std::size_t simd_size = traits::size; in transform() local 101 for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ in transform() 153 constexpr std::size_t simd_size = traits::size; 155 if(size < simd_size) 179 ptr += simd_size; 180 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) [all …]
|
/dports/devel/py-pythran/pythran-0.11.0/third_party/xsimd/stl/ |
H A D | algorithms.hpp | 31 std::size_t simd_size = batch_type::size; in transform() local 37 std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); in transform() 47 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform() 65 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform() 85 std::size_t simd_size = batch_type::size; in transform() local 103 for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ in transform() 152 constexpr std::size_t simd_size = batch_type::size; 154 if (size < simd_size) 177 ptr += simd_size; 178 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) [all …]
|
/dports/devel/py-pythran/pythran-0.11.0/pythran/xsimd/stl/ |
H A D | algorithms.hpp | 31 std::size_t simd_size = batch_type::size; in transform() local 37 std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); in transform() 47 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform() 65 for (std::size_t i = align_begin; i < align_end; i += simd_size) in transform() 85 std::size_t simd_size = batch_type::size; in transform() local 103 for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ in transform() 154 constexpr std::size_t simd_size = batch_type::size; 156 if(size < simd_size) 179 ptr += simd_size; 180 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) [all …]
|
/dports/science/lammps/lammps-stable_29Sep2021/lib/gpu/ |
H A D | lal_neighbor_gpu.cu | 190 int simd_size = simd_size(); in calc_neigh_list_cell() local 191 int subgroup_id_local = tid / simd_size; in calc_neigh_list_cell() 193 int lane_id = tid % simd_size; in calc_neigh_list_cell() 209 simd_size + lane_id; in calc_neigh_list_cell() 218 int end_idx = simd_size; in calc_neigh_list_cell() 231 offset += simd_size; in calc_neigh_list_cell() 236 offset = subgroup_id_local * simd_size; in calc_neigh_list_cell() 250 int remainder = num_iter % simd_size; in calc_neigh_list_cell() 286 end_idx = simd_size; in calc_neigh_list_cell() 288 cell_pos += simd_size; in calc_neigh_list_cell() [all …]
|
H A D | lal_aux_fun1.h | 235 block_reduce_add1(simd_size(), red_acc, tid, energy); \ 244 block_reduce_arr(6, simd_size(), red_acc, tid, virial); \ 289 block_reduce_add2(simd_size(), red_acc, tid, energy, e_coul); \ 300 block_reduce_arr(6, simd_size(), red_acc, tid, virial); \ 385 const int vwidth = simd_size(); \ 386 const int voffset = tid & (simd_size() - 1); \ 387 const int bnum = tid/simd_size(); \ 388 int active_subgs = BLOCK_SIZE_X/simd_size(); \ 390 if (active_subgs < BLOCK_SIZE_X/simd_size()) __syncthreads(); \ 463 const int vwidth = simd_size(); \ [all …]
|
H A D | lal_neighbor_shared.h | 79 const int simd_size) { in setup_auto_cell_size() argument 84 _simd_size = simd_size; in setup_auto_cell_size()
|
/dports/devel/ispc/ispc-1.16.1/examples/xpu/sgemm/ |
H A D | NaiveCMSgemm_genx.cpp | 62 const int simd_size = 16; local 63 vector<float, simd_size> a, c; // assuming this is 2 consequent registers 66 vector<uint, simd_size> off(init_0_15); 67 vector<svmptr_t, simd_size> offseta; 69 vector<svmptr_t, simd_size> offsetc; 72 for (; m < end; m += simd_size) { 74 if (m + simd_size > end) { 100 vector<float, simd_size> a_rem, c_rem; 101 vector<svmptr_t, simd_size> a_rem_offset, c_rem_offset;
|
/dports/graphics/opencv/opencv-4.5.3/modules/dnn/src/ocl4dnn/src/ |
H A D | ocl4dnn_conv_spatial.cpp | 348 int32_t simd_size; in setupKernelDetails() local 351 simd_size = blockN; in setupKernelDetails() 357 if (simd_size == 16) in setupKernelDetails() 394 addDef("FILTERS_IN_GROUP", ((int)alignSize(M_, simd_size) / simd_size)); in setupKernelDetails() 424 simd_size = blockK; in setupKernelDetails() 1418 int32_t simd_size = blockK; in createGEMMLikeConvKernel() local 1423 gy = alignSize(gy, simd_size); in createGEMMLikeConvKernel() 1484 blockN_ = simd_size; in createIDLFKernel() 1616 if (simd_size != 8 && simd_size != 16) in generate_idlf_tuneritems() 1650 if (tile_x > simd_size) in generate_idlf_tuneritems() [all …]
|
/dports/devel/xsimd/xsimd-7.6.0/docs/source/ |
H A D | basic_usage.rst | 54 constexpr std::size_t simd_size = xsimd::simd_type<double>::size; 55 std::size_t vec_size = size - size % simd_size; 57 for(std::size_t i = 0; i < vec_size; i += simd_size)
|
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/AdaptorCommon/ |
H A D | ProcessFuncAttributes.cpp | 899 int simd_size = 0; in checkKernelSimdSize() local 908 if (sz != 0 && simd_size == 0) in checkKernelSimdSize() 909 simd_size = sz; in checkKernelSimdSize() 913 IGC_ASSERT_MESSAGE(simd_size == sz, "Function is called with different sub group size"); in checkKernelSimdSize() 916 if (simd_size != 0) in checkKernelSimdSize() 918 IGC_ASSERT_MESSAGE((simd_size == 8) || (simd_size == 16) || (simd_size == 32), in checkKernelSimdSize() 927 sgHandle->setSIMD_size(simd_size); in checkKernelSimdSize() 929 else if (sgHandle->getSIMD_size() != simd_size) in checkKernelSimdSize()
|
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/Compiler/ |
H A D | SPIRMetaDataTranslation.cpp | 175 int simd_size = reqdSubGroupSize->getSIMD_Size(); in runOnModule() local 176 if (!((simd_size == 8) || (simd_size == 16) || (simd_size == 32))) in runOnModule() 181 sgHandle->setSIMD_size(simd_size); in runOnModule()
|
/dports/math/xtensor/xtensor-0.24.0/include/xtensor/ |
H A D | xassign.hpp | 354 …static constexpr bool simd_assign() { return convertible_types() && simd_size() && simd_interface(… in simd_assign() 632 constexpr size_type simd_size = simd_type::size; in run() local 635 …size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size); in run() 636 size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); in run() 644 tbb::parallel_for(align_begin, align_end, simd_size, [&e1, &e2](size_t i) in run() 653 for (size_type i = align_begin; i < align_end; i += simd_size) in run() 659 i += static_cast<std::ptrdiff_t>(simd_size)) in run() 668 for (size_type i = align_begin; i < align_end; i += simd_size) in run() 674 for (size_type i = align_begin; i < align_end; i += simd_size) in run() 944 std::size_t simd_size = inner_loop_size / simd_type::size; in run() local [all …]
|
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/ZEBinWriter/zebin/source/autogen/ |
H A D | ZEInfo.hpp | 35 …required_work_group_size == other.required_work_group_size && simd_size == other.simd_size && slm_… in operator ==() 53 zeinfo_int32_t simd_size = 0; member
|
/dports/devel/radare2/radare2-5.1.1/libr/asm/arch/arm/aarch64/ |
H A D | aarch64-dis.c | 840 unsigned simd_size; in decode_limm() local 851 simd_size = 64; in decode_limm() 857 if (S >= 0x00 && S <= 0x1f) { simd_size = 32; } in decode_limm() 866 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in decode_limm() 867 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in decode_limm() 874 mask = (1ull << simd_size) - 1; in decode_limm() 876 R &= simd_size - 1; in decode_limm() 879 if (simd_size > esize * 8) in decode_limm() 883 if (S == simd_size - 1) in decode_limm() 890 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in decode_limm() [all …]
|
/dports/graphics/mesa-dri-classic/mesa-20.2.3/src/mesa/drivers/dri/i965/ |
H A D | brw_cs.h | 34 unsigned simd_size; member
|
/dports/devel/intel-graphics-compiler/intel-graphics-compiler-igc-1.0.9636/IGC/ZEBinWriter/tools/ |
H A D | Tester.cpp | 27 k1.execution_env.simd_size = 8; in getTestZEInfo() 91 k2.execution_env.simd_size = 16; in getTestZEInfo()
|
/dports/lang/gnatdroid-binutils-x86/binutils-2.27/opcodes/ |
H A D | aarch64-dis.c | 725 unsigned simd_size; in aarch64_ext_limm() local 745 simd_size = 64; in aarch64_ext_limm() 752 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in aarch64_ext_limm() 753 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in aarch64_ext_limm() 754 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; in aarch64_ext_limm() 755 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; in aarch64_ext_limm() 759 mask = (1ull << simd_size) - 1; in aarch64_ext_limm() 761 R &= simd_size - 1; in aarch64_ext_limm() 764 if (S == simd_size - 1) in aarch64_ext_limm() 771 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in aarch64_ext_limm() [all …]
|
/dports/lang/gnatdroid-binutils/binutils-2.27/opcodes/ |
H A D | aarch64-dis.c | 725 unsigned simd_size; in aarch64_ext_limm() local 745 simd_size = 64; in aarch64_ext_limm() 752 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in aarch64_ext_limm() 753 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in aarch64_ext_limm() 754 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; in aarch64_ext_limm() 755 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; in aarch64_ext_limm() 759 mask = (1ull << simd_size) - 1; in aarch64_ext_limm() 761 R &= simd_size - 1; in aarch64_ext_limm() 764 if (S == simd_size - 1) in aarch64_ext_limm() 771 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in aarch64_ext_limm() [all …]
|
/dports/devel/gdb761/gdb-7.6.1/opcodes/ |
H A D | aarch64-dis.c | 706 unsigned simd_size; in aarch64_ext_limm() local 726 simd_size = 64; in aarch64_ext_limm() 733 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; in aarch64_ext_limm() 734 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; in aarch64_ext_limm() 735 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; in aarch64_ext_limm() 736 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; in aarch64_ext_limm() 740 mask = (1ull << simd_size) - 1; in aarch64_ext_limm() 742 R &= simd_size - 1; in aarch64_ext_limm() 745 if (S == simd_size - 1) in aarch64_ext_limm() 752 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); in aarch64_ext_limm() [all …]
|