/dports/graphics/opencv/opencv-4.5.3/contrib/modules/cudev/include/opencv2/cudev/warp/ |
H A D | shuffle.hpp | 71 return (uchar) __shfl((int) val, srcLane, width); in shfl() 76 return (schar) __shfl((int) val, srcLane, width); in shfl() 81 return (ushort) __shfl((int) val, srcLane, width); in shfl() 86 return (short) __shfl((int) val, srcLane, width); in shfl() 91 return __shfl(val, srcLane, width); in shfl() 96 return (uint) __shfl((int) val, srcLane, width); in shfl() 101 return __shfl(val, srcLane, width); in shfl() 109 lo = __shfl(lo, srcLane, width); in shfl() 110 hi = __shfl(hi, srcLane, width); in shfl() 119 shfl(val.x, srcLane, width) \ [all …]
|
/dports/math/faiss/faiss-1.7.1/faiss/gpu/utils/ |
H A D | WarpShuffles.cuh | 25 inline __device__ T shfl(const T val, int srcLane, int width = kWarpSize) { in shfl() argument 27 return __shfl_sync(0xffffffff, val, srcLane, width); in shfl() 29 return __shfl(val, srcLane, width); in shfl() 35 inline __device__ T* shfl(T* const val, int srcLane, int width = kWarpSize) { in shfl() argument 39 return (T*)shfl(v, srcLane, width); in shfl() 107 inline __device__ half shfl(half v, int srcLane, int width = kWarpSize) { in shfl() argument 109 vu = __shfl(vu, srcLane, width); in shfl()
|
/dports/graphics/opencv/opencv-4.5.3/modules/core/include/opencv2/core/cuda/ |
H A D | warp_shuffle.hpp | 60 __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) in shfl() argument 63 return __shfl(val, srcLane, width); in shfl() 68 … __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) in shfl() argument 71 return (unsigned int) __shfl((int) val, srcLane, width); in shfl() 76 __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) in shfl() argument 82 lo = __shfl(lo, srcLane, width); in shfl() 83 hi = __shfl(hi, srcLane, width); in shfl()
|
/dports/devel/llvm90/llvm-9.0.1.src/projects/openmp/libomptarget/deviceRTLs/nvptx/src/ |
H A D | omptarget-nvptx.h | 54 #define __SHFL_SYNC(mask, var, srcLane) __shfl_sync((mask), (var), (srcLane)) argument 59 #define __SHFL_SYNC(mask, var, srcLane) __shfl((var), (srcLane)) argument
|
/dports/devel/llvm80/llvm-8.0.1.src/projects/openmp/libomptarget/deviceRTLs/nvptx/src/ |
H A D | omptarget-nvptx.h | 53 #define __SHFL_SYNC(mask, var, srcLane) __shfl_sync((mask), (var), (srcLane)) argument 58 #define __SHFL_SYNC(mask, var, srcLane) __shfl((var), (srcLane)) argument
|
/dports/www/firefox-esr/firefox-91.8.0/js/src/jit/x86-shared/ |
H A D | MacroAssembler-x86-shared-SIMD-unused.cpp | 354 unsigned srcLane; in shuffleX4() local 357 srcLane = x - 4; in shuffleX4() 360 srcLane = y - 4; in shuffleX4() 363 srcLane = z - 4; in shuffleX4() 367 srcLane = w - 4; in shuffleX4() 370 vinsertps(vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in shuffleX4()
|
/dports/mail/thunderbird/thunderbird-91.8.0/js/src/jit/x86-shared/ |
H A D | MacroAssembler-x86-shared-SIMD-unused.cpp | 354 unsigned srcLane; in shuffleX4() local 357 srcLane = x - 4; in shuffleX4() 360 srcLane = y - 4; in shuffleX4() 363 srcLane = z - 4; in shuffleX4() 367 srcLane = w - 4; in shuffleX4() 370 vinsertps(vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in shuffleX4()
|
/dports/devel/llvm-cheri/llvm-project-37c49ff00e3eadce5d8703fdc4497f28458c64a8/openmp/libomptarget/deviceRTLs/amdgcn/src/ |
H A D | target_impl.hip | 50 int32_t srcLane) { 53 int index = srcLane + (self & ~(width - 1));
|
/dports/devel/llvm11/llvm-11.0.1.src/projects/openmp/libomptarget/deviceRTLs/amdgcn/src/ |
H A D | target_impl.hip | 50 int32_t srcLane) { 53 int index = srcLane + (self & ~(width - 1));
|
/dports/science/lammps/lammps-stable_29Sep2021/lib/kokkos/core/src/HIP/ |
H A D | Kokkos_HIP_Vectorization.hpp | 190 __device__ inline T shfl(const T& val, const int& srcLane, const int& width) { in shfl() argument 192 Impl::in_place_shfl(rv, val, srcLane, width); in shfl()
|
/dports/devel/kokkos/kokkos-3.4.01/core/src/HIP/ |
H A D | Kokkos_HIP_Vectorization.hpp | 190 __device__ inline T shfl(const T& val, const int& srcLane, const int& width) { in shfl() argument 192 Impl::in_place_shfl(rv, val, srcLane, width); in shfl()
|
/dports/science/sparta/sparta-20Oct2021/lib/kokkos/core/src/HIP/ |
H A D | Kokkos_HIP_Vectorization.hpp | 190 __device__ inline T shfl(const T& val, const int& srcLane, const int& width) { in shfl() argument 192 Impl::in_place_shfl(rv, val, srcLane, width); in shfl()
|
/dports/science/lammps/lammps-stable_29Sep2021/lib/kokkos/core/src/Cuda/ |
H A D | Kokkos_Cuda_Vectorization.hpp | 204 __device__ inline T shfl(const T& val, const int& srcLane, const int& width, in shfl() argument 207 Impl::in_place_shfl(rv, val, srcLane, width, mask); in shfl()
|
/dports/devel/kokkos/kokkos-3.4.01/core/src/Cuda/ |
H A D | Kokkos_Cuda_Vectorization.hpp | 204 __device__ inline T shfl(const T& val, const int& srcLane, const int& width, in shfl() argument 207 Impl::in_place_shfl(rv, val, srcLane, width, mask); in shfl()
|
/dports/science/sparta/sparta-20Oct2021/lib/kokkos/core/src/Cuda/ |
H A D | Kokkos_Cuda_Vectorization.hpp | 204 __device__ inline T shfl(const T& val, const int& srcLane, const int& width, in shfl() argument 207 Impl::in_place_shfl(rv, val, srcLane, width, mask); in shfl()
|
/dports/www/chromium-legacy/chromium-88.0.4324.182/third_party/llvm/openmp/libomptarget/deviceRTLs/amdgcn/src/ |
H A D | target_impl.hip | 53 int32_t srcLane) { 56 int index = srcLane + (self & ~(width - 1));
|
/dports/devel/llvm12/llvm-project-12.0.1.src/openmp/libomptarget/deviceRTLs/amdgcn/src/ |
H A D | target_impl.hip | 54 int32_t srcLane) { 57 int index = srcLane + (self & ~(width - 1));
|
/dports/devel/wasi-compiler-rt12/llvm-project-12.0.1.src/openmp/libomptarget/deviceRTLs/amdgcn/src/ |
H A D | target_impl.hip | 54 int32_t srcLane) { 57 int index = srcLane + (self & ~(width - 1));
|
/dports/math/pdal/PDAL-2.3.0/vendor/eigen/Eigen/src/Core/arch/Default/ |
H A D | Half.h | 824 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,… 825 return static_cast<Eigen::half>(__shfl_sync(mask, static_cast<__half>(var), srcLane, width)); 842 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)… 844 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
|
/dports/misc/openmvg/openMVG-2.0/src/third_party/eigen/Eigen/src/Core/arch/Default/ |
H A D | Half.h | 878 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,… 880 return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width)); 900 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)… 902 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
|
/dports/devel/bullet/bullet3-3.21/examples/ThirdPartyLibs/Eigen/src/Core/arch/Default/ |
H A D | Half.h | 878 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,… 880 return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width)); 900 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)… 902 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
|
/dports/devel/py-bullet3/bullet3-3.21/examples/ThirdPartyLibs/Eigen/src/Core/arch/Default/ |
H A D | Half.h | 878 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane,… 880 return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width)); 900 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize)… 902 …:numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
|
/dports/lang/spidermonkey78/firefox-78.9.0/js/src/jit/x86-shared/ |
H A D | MacroAssembler-x86-shared-SIMD.cpp | 687 unsigned srcLane; in shuffleX4() local 690 srcLane = x - 4; in shuffleX4() 693 srcLane = y - 4; in shuffleX4() 696 srcLane = z - 4; in shuffleX4() 700 srcLane = w - 4; in shuffleX4() 703 vinsertps(vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in shuffleX4()
|
/dports/graphics/opencv/opencv-4.5.3/contrib/modules/cudastereo/src/cuda/ |
H A D | stereosgm.cu | 41 __device__ __forceinline__ static T shfl(T var, int srcLane, int width = cudev::WARP_SIZE, uint32_t… in shfl() argument 45 return __shfl_sync(mask, var, srcLane, width); in shfl() 47 return __shfl(var, srcLane, width); in shfl() 51 srcLane %= width; in shfl() 53 T ret = smem[cudev::Warp::warpId()][srcLane + (cudev::Warp::laneId() / width) * width]; in shfl()
|
/dports/databases/mongodb36/mongodb-src-r3.6.23/src/third_party/mozjs-45/extract/js/src/jit/x86-shared/ |
H A D | CodeGenerator-x86-shared.cpp | 2703 SimdLane srcLane; in visitSimdShuffle() local 2706 srcLane = SimdLane(x - 4); in visitSimdShuffle() 2709 srcLane = SimdLane(y - 4); in visitSimdShuffle() 2712 srcLane = SimdLane(z - 4); in visitSimdShuffle() 2716 srcLane = SimdLane(w - 4); in visitSimdShuffle() 2719 masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out); in visitSimdShuffle()
|