/dports/math/faiss/faiss-1.7.1/faiss/gpu/utils/ |
H A D | BlockSelectKernel.cuh | 21 int ThreadsPerBlock> 29 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in blockSelect() 41 ThreadsPerBlock> in blockSelect() 53 for (; i < limit; i += ThreadsPerBlock) { in blockSelect() 55 inStart += ThreadsPerBlock; in blockSelect() 77 int ThreadsPerBlock> 86 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in blockSelectPair() 98 ThreadsPerBlock> in blockSelectPair() 111 for (; i < limit; i += ThreadsPerBlock) { in blockSelectPair() 113 inKStart += ThreadsPerBlock; in blockSelectPair() [all …]
|
H A D | Select.cuh | 146 int ThreadsPerBlock> 148 static constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; 165 utils::isPowerOf2(ThreadsPerBlock), in BlockSelect() 344 int ThreadsPerBlock> 345 struct BlockSelect<K, V, Dir, Comp, 1, NumThreadQ, ThreadsPerBlock> { 346 static constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; 438 int ThreadsPerBlock> 449 utils::isPowerOf2(ThreadsPerBlock), in WarpSelect() 595 int ThreadsPerBlock> 596 struct WarpSelect<K, V, Dir, Comp, 1, NumThreadQ, ThreadsPerBlock> { [all …]
|
H A D | WarpSelectKernel.cuh | 21 int ThreadsPerBlock> 29 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in warpSelect() 38 ThreadsPerBlock> in warpSelect()
|
/dports/devel/llvm80/llvm-8.0.1.src/projects/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 103 std::vector<int> ThreadsPerBlock; member in RTLDeviceInfoTy 208 ThreadsPerBlock.resize(NumberOfDevices); in RTLDeviceInfoTy() 311 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads; in __tgt_rtl_init_device() 313 DeviceInfo.ThreadsPerBlock[device_id] = maxBlockDimX; in __tgt_rtl_init_device() 316 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::HardThreadLimit; in __tgt_rtl_init_device() 365 DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_init_device() 366 DeviceInfo.NumTeams[device_id] = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_init_device() 368 DeviceInfo.ThreadsPerBlock[device_id]); in __tgt_rtl_init_device() 669 if (cudaThreadsPerBlock > DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_run_target_team_region() 670 cudaThreadsPerBlock = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_run_target_team_region() [all …]
|
/dports/devel/llvm10/llvm-10.0.1.src/projects/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 100 std::vector<int> ThreadsPerBlock; member in RTLDeviceInfoTy 208 ThreadsPerBlock.resize(NumberOfDevices); in RTLDeviceInfoTy() 320 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads; in __tgt_rtl_init_device() 322 DeviceInfo.ThreadsPerBlock[device_id] = maxBlockDimX; in __tgt_rtl_init_device() 325 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::HardThreadLimit; in __tgt_rtl_init_device() 374 DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_init_device() 375 DeviceInfo.NumTeams[device_id] = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_init_device() 377 DeviceInfo.ThreadsPerBlock[device_id]); in __tgt_rtl_init_device() 698 if (cudaThreadsPerBlock > DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_run_target_team_region() 699 cudaThreadsPerBlock = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_run_target_team_region() [all …]
|
/dports/devel/tinygo/tinygo-0.14.1/llvm-project/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 100 std::vector<int> ThreadsPerBlock; member in RTLDeviceInfoTy 208 ThreadsPerBlock.resize(NumberOfDevices); in RTLDeviceInfoTy() 320 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads; in __tgt_rtl_init_device() 322 DeviceInfo.ThreadsPerBlock[device_id] = maxBlockDimX; in __tgt_rtl_init_device() 325 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::HardThreadLimit; in __tgt_rtl_init_device() 374 DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_init_device() 375 DeviceInfo.NumTeams[device_id] = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_init_device() 377 DeviceInfo.ThreadsPerBlock[device_id]); in __tgt_rtl_init_device() 698 if (cudaThreadsPerBlock > DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_run_target_team_region() 699 cudaThreadsPerBlock = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_run_target_team_region() [all …]
|
/dports/devel/llvm90/llvm-9.0.1.src/projects/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 102 std::vector<int> ThreadsPerBlock; member in RTLDeviceInfoTy 210 ThreadsPerBlock.resize(NumberOfDevices); in RTLDeviceInfoTy() 322 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads; in __tgt_rtl_init_device() 324 DeviceInfo.ThreadsPerBlock[device_id] = maxBlockDimX; in __tgt_rtl_init_device() 327 DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::HardThreadLimit; in __tgt_rtl_init_device() 376 DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_init_device() 377 DeviceInfo.NumTeams[device_id] = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_init_device() 379 DeviceInfo.ThreadsPerBlock[device_id]); in __tgt_rtl_init_device() 700 if (cudaThreadsPerBlock > DeviceInfo.ThreadsPerBlock[device_id]) { in __tgt_rtl_run_target_team_region() 701 cudaThreadsPerBlock = DeviceInfo.ThreadsPerBlock[device_id]; in __tgt_rtl_run_target_team_region() [all …]
|
/dports/math/faiss/faiss-1.7.1/faiss/gpu/impl/ |
H A D | IVFUtilsSelect1.cu | 24 template <int ThreadsPerBlock, int NumWarpQ, int NumThreadQ, bool Dir> 32 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in pass1SelectLists() 45 ThreadsPerBlock> in pass1SelectLists()
|
H A D | IVFUtilsSelect2.cu | 52 template <int ThreadsPerBlock, int NumWarpQ, int NumThreadQ, bool Dir> 63 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in pass2SelectLists() 76 ThreadsPerBlock> in pass2SelectLists()
|
H A D | IVFInterleaved.cu | 18 template <int ThreadsPerBlock, int NumWarpQ, int NumThreadQ> 31 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in ivfInterleavedScan2() 46 ThreadsPerBlock> in ivfInterleavedScan2()
|
H A D | L2Select.cu | 128 template <typename T, int NumWarpQ, int NumThreadQ, int ThreadsPerBlock> 137 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in l2SelectMinK() 149 ThreadsPerBlock> in l2SelectMinK()
|
H A D | IVFInterleaved.cuh | 36 int ThreadsPerBlock, 54 constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; in ivfInterleavedScan() 91 ThreadsPerBlock> in ivfInterleavedScan()
|
/dports/devel/wasi-compiler-rt13/llvm-project-13.0.1.src/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 143 int ThreadsPerBlock = 0; member 613 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 616 DeviceData[DeviceId].ThreadsPerBlock > EnvTeamThreadLimit) { in initDevice() 619 DeviceData[DeviceId].ThreadsPerBlock, EnvTeamThreadLimit); in initDevice() 620 DeviceData[DeviceId].ThreadsPerBlock = EnvTeamThreadLimit; in initDevice() 700 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 702 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 703 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 1048 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 1050 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/wasi-libcxx/llvm-project-13.0.1.src/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 143 int ThreadsPerBlock = 0; member 613 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 616 DeviceData[DeviceId].ThreadsPerBlock > EnvTeamThreadLimit) { in initDevice() 619 DeviceData[DeviceId].ThreadsPerBlock, EnvTeamThreadLimit); in initDevice() 620 DeviceData[DeviceId].ThreadsPerBlock = EnvTeamThreadLimit; in initDevice() 700 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 702 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 703 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 1048 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 1050 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/llvm13/llvm-project-13.0.1.src/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 143 int ThreadsPerBlock = 0; member 613 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 616 DeviceData[DeviceId].ThreadsPerBlock > EnvTeamThreadLimit) { in initDevice() 619 DeviceData[DeviceId].ThreadsPerBlock, EnvTeamThreadLimit); in initDevice() 620 DeviceData[DeviceId].ThreadsPerBlock = EnvTeamThreadLimit; in initDevice() 700 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 702 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 703 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 1048 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 1050 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/llvm-devel/llvm-project-f05c95f10fc1d8171071735af8ad3a9e87633120/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 182 int ThreadsPerBlock = 0; member 658 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice() 661 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 664 DeviceData[DeviceId].ThreadsPerBlock > EnvTeamThreadLimit) { in initDevice() 667 DeviceData[DeviceId].ThreadsPerBlock, EnvTeamThreadLimit); in initDevice() 668 DeviceData[DeviceId].ThreadsPerBlock = EnvTeamThreadLimit; in initDevice() 748 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 750 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 1099 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 1101 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/llvm-cheri/llvm-project-37c49ff00e3eadce5d8703fdc4497f28458c64a8/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 124 int ThreadsPerBlock = 0; member 478 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice() 481 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 486 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit; in initDevice() 509 DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock, in initDevice() 534 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 536 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 537 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 883 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 885 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/llvm11/llvm-11.0.1.src/projects/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 124 int ThreadsPerBlock = 0; member 478 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice() 481 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 486 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit; in initDevice() 509 DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock, in initDevice() 534 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 536 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 537 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 883 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 885 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/www/chromium-legacy/chromium-88.0.4324.182/third_party/llvm/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 123 int ThreadsPerBlock = 0; member 474 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice() 477 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 482 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit; in initDevice() 508 DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); in initDevice() 532 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 534 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 535 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 880 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 882 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/llvm12/llvm-project-12.0.1.src/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 136 int ThreadsPerBlock = 0; member 558 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice() 561 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 566 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit; in initDevice() 592 DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); in initDevice() 616 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 618 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 619 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 952 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 954 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|
/dports/devel/wasi-compiler-rt12/llvm-project-12.0.1.src/openmp/libomptarget/plugins/cuda/src/ |
H A D | rtl.cpp | 136 int ThreadsPerBlock = 0; member 558 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice() 561 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice() 566 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit; in initDevice() 592 DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); in initDevice() 616 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice() 618 DeviceData[DeviceId].ThreadsPerBlock); in initDevice() 619 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice() 952 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion() 954 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion() [all …]
|