Home
last modified time | relevance | path

Searched refs:srcLane (Results 1 – 25 of 33) sorted by relevance

12

/dports/graphics/opencv/opencv-4.5.3/contrib/modules/cudev/include/opencv2/cudev/warp/
H A Dshuffle.hpp71 return (uchar) __shfl((int) val, srcLane, width); in shfl()
76 return (schar) __shfl((int) val, srcLane, width); in shfl()
81 return (ushort) __shfl((int) val, srcLane, width); in shfl()
86 return (short) __shfl((int) val, srcLane, width); in shfl()
91 return __shfl(val, srcLane, width); in shfl()
96 return (uint) __shfl((int) val, srcLane, width); in shfl()
101 return __shfl(val, srcLane, width); in shfl()
109 lo = __shfl(lo, srcLane, width); in shfl()
110 hi = __shfl(hi, srcLane, width); in shfl()
119 shfl(val.x, srcLane, width) \
[all …]
/dports/math/faiss/faiss-1.7.1/faiss/gpu/utils/
H A DWarpShuffles.cuh25 inline __device__ T shfl(const T val, int srcLane, int width = kWarpSize) { in shfl() argument
27 return __shfl_sync(0xffffffff, val, srcLane, width); in shfl()
29 return __shfl(val, srcLane, width); in shfl()
35 inline __device__ T* shfl(T* const val, int srcLane, int width = kWarpSize) { in shfl() argument
39 return (T*)shfl(v, srcLane, width); in shfl()
107 inline __device__ half shfl(half v, int srcLane, int width = kWarpSize) { in shfl() argument
109 vu = __shfl(vu, srcLane, width); in shfl()
/dports/graphics/opencv/opencv-4.5.3/modules/core/include/opencv2/core/cuda/
H A Dwarp_shuffle.hpp60 __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) in shfl() argument
63 return __shfl(val, srcLane, width); in shfl()
68 … __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) in shfl() argument
71 return (unsigned int) __shfl((int) val, srcLane, width); in shfl()
76 __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) in shfl() argument
82 lo = __shfl(lo, srcLane, width); in shfl()
83 hi = __shfl(hi, srcLane, width); in shfl()
/dports/devel/llvm90/llvm-9.0.1.src/projects/openmp/libomptarget/deviceRTLs/nvptx/src/
H A Domptarget-nvptx.h54 #define __SHFL_SYNC(mask, var, srcLane) __shfl_sync((mask), (var), (srcLane)) argument
59 #define __SHFL_SYNC(mask, var, srcLane) __shfl((var), (srcLane)) argument
/dports/devel/llvm80/llvm-8.0.1.src/projects/openmp/libomptarget/deviceRTLs/nvptx/src/
H A Domptarget-nvptx.h53 #define __SHFL_SYNC(mask, var, srcLane) __shfl_sync((mask), (var), (srcLane)) argument
58 #define __SHFL_SYNC(mask, var, srcLane) __shfl((var), (srcLane)) argument
/dports/www/firefox-esr/firefox-91.8.0/js/src/jit/x86-shared/
H A DMacroAssembler-x86-shared-SIMD-unused.cpp354 unsigned srcLane; in shuffleX4() local
357 srcLane = x - 4; in shuffleX4()
360 srcLane = y - 4; in shuffleX4()
363 srcLane = z - 4; in shuffleX4()
367 srcLane = w - 4; in shuffleX4()
370 vinsertps(vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in shuffleX4()
/dports/mail/thunderbird/thunderbird-91.8.0/js/src/jit/x86-shared/
H A DMacroAssembler-x86-shared-SIMD-unused.cpp354 unsigned srcLane; in shuffleX4() local
357 srcLane = x - 4; in shuffleX4()
360 srcLane = y - 4; in shuffleX4()
363 srcLane = z - 4; in shuffleX4()
367 srcLane = w - 4; in shuffleX4()
370 vinsertps(vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in shuffleX4()
/dports/devel/llvm-cheri/llvm-project-37c49ff00e3eadce5d8703fdc4497f28458c64a8/openmp/libomptarget/deviceRTLs/amdgcn/src/
H A Dtarget_impl.hip50 int32_t srcLane) {
53 int index = srcLane + (self & ~(width - 1));
/dports/devel/llvm11/llvm-11.0.1.src/projects/openmp/libomptarget/deviceRTLs/amdgcn/src/
H A Dtarget_impl.hip50 int32_t srcLane) {
53 int index = srcLane + (self & ~(width - 1));
/dports/science/lammps/lammps-stable_29Sep2021/lib/kokkos/core/src/HIP/
H A DKokkos_HIP_Vectorization.hpp190 __device__ inline T shfl(const T& val, const int& srcLane, const int& width) { in shfl() argument
192 Impl::in_place_shfl(rv, val, srcLane, width); in shfl()
/dports/devel/kokkos/kokkos-3.4.01/core/src/HIP/
H A DKokkos_HIP_Vectorization.hpp190 __device__ inline T shfl(const T& val, const int& srcLane, const int& width) { in shfl() argument
192 Impl::in_place_shfl(rv, val, srcLane, width); in shfl()
/dports/science/sparta/sparta-20Oct2021/lib/kokkos/core/src/HIP/
H A DKokkos_HIP_Vectorization.hpp190 __device__ inline T shfl(const T& val, const int& srcLane, const int& width) { in shfl() argument
192 Impl::in_place_shfl(rv, val, srcLane, width); in shfl()
/dports/science/lammps/lammps-stable_29Sep2021/lib/kokkos/core/src/Cuda/
H A DKokkos_Cuda_Vectorization.hpp204 __device__ inline T shfl(const T& val, const int& srcLane, const int& width, in shfl() argument
207 Impl::in_place_shfl(rv, val, srcLane, width, mask); in shfl()
/dports/devel/kokkos/kokkos-3.4.01/core/src/Cuda/
H A DKokkos_Cuda_Vectorization.hpp204 __device__ inline T shfl(const T& val, const int& srcLane, const int& width, in shfl() argument
207 Impl::in_place_shfl(rv, val, srcLane, width, mask); in shfl()
/dports/science/sparta/sparta-20Oct2021/lib/kokkos/core/src/Cuda/
H A DKokkos_Cuda_Vectorization.hpp204 __device__ inline T shfl(const T& val, const int& srcLane, const int& width, in shfl() argument
207 Impl::in_place_shfl(rv, val, srcLane, width, mask); in shfl()
/dports/www/chromium-legacy/chromium-88.0.4324.182/third_party/llvm/openmp/libomptarget/deviceRTLs/amdgcn/src/
H A Dtarget_impl.hip53 int32_t srcLane) {
56 int index = srcLane + (self & ~(width - 1));
/dports/devel/llvm12/llvm-project-12.0.1.src/openmp/libomptarget/deviceRTLs/amdgcn/src/
H A Dtarget_impl.hip54 int32_t srcLane) {
57 int index = srcLane + (self & ~(width - 1));
/dports/devel/wasi-compiler-rt12/llvm-project-12.0.1.src/openmp/libomptarget/deviceRTLs/amdgcn/src/
H A Dtarget_impl.hip54 int32_t srcLane) {
57 int index = srcLane + (self & ~(width - 1));
/dports/math/pdal/PDAL-2.3.0/vendor/eigen/Eigen/src/Core/arch/Default/
H A DHalf.h824 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,…
825 return static_cast<Eigen::half>(__shfl_sync(mask, static_cast<__half>(var), srcLane, width));
842 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)…
844 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
/dports/misc/openmvg/openMVG-2.0/src/third_party/eigen/Eigen/src/Core/arch/Default/
H A DHalf.h878 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,…
880 return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
900 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)…
902 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
/dports/devel/bullet/bullet3-3.21/examples/ThirdPartyLibs/Eigen/src/Core/arch/Default/
H A DHalf.h878 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,…
880 return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
900 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)…
902 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
/dports/devel/py-bullet3/bullet3-3.21/examples/ThirdPartyLibs/Eigen/src/Core/arch/Default/
H A DHalf.h878 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,…
880 return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
900 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)…
902 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
/dports/lang/spidermonkey78/firefox-78.9.0/js/src/jit/x86-shared/
H A DMacroAssembler-x86-shared-SIMD.cpp687 unsigned srcLane; in shuffleX4() local
690 srcLane = x - 4; in shuffleX4()
693 srcLane = y - 4; in shuffleX4()
696 srcLane = z - 4; in shuffleX4()
700 srcLane = w - 4; in shuffleX4()
703 vinsertps(vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in shuffleX4()
/dports/graphics/opencv/opencv-4.5.3/contrib/modules/cudastereo/src/cuda/
H A Dstereosgm.cu41 __device__ __forceinline__ static T shfl(T var, int srcLane, int width = cudev::WARP_SIZE, uint32_t… in shfl() argument
45 return __shfl_sync(mask, var, srcLane, width); in shfl()
47 return __shfl(var, srcLane, width); in shfl()
51 srcLane %= width; in shfl()
53 T ret = smem[cudev::Warp::warpId()][srcLane + (cudev::Warp::laneId() / width) * width]; in shfl()
/dports/databases/mongodb36/mongodb-src-r3.6.23/src/third_party/mozjs-45/extract/js/src/jit/x86-shared/
H A DCodeGenerator-x86-shared.cpp2703 SimdLane srcLane; in visitSimdShuffle() local
2706 srcLane = SimdLane(x - 4); in visitSimdShuffle()
2709 srcLane = SimdLane(y - 4); in visitSimdShuffle()
2712 srcLane = SimdLane(z - 4); in visitSimdShuffle()
2716 srcLane = SimdLane(w - 4); in visitSimdShuffle()
2719 masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in visitSimdShuffle()

12