1 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// \brief Provides definitions for Target specific Grid Values 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 15 #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 16 17 namespace llvm { 18 19 namespace omp { 20 21 /// \brief Defines various target-specific GPU grid values that must be 22 /// consistent between host RTL (plugin), device RTL, and clang. 23 /// We can change grid values for a "fat" binary so that different 24 /// passes get the correct values when generating code for a 25 /// multi-target binary. Both amdgcn and nvptx values are stored in 26 /// this file. In the future, should there be differences between GPUs 27 /// of the same architecture, then simply make a different array and 28 /// use the new array name. 29 /// 30 /// Example usage in clang: 31 /// const unsigned slot_size = 32 /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; 33 /// 34 /// Example usage in libomptarget/deviceRTLs: 35 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 36 /// #ifdef __AMDGPU__ 37 /// #define GRIDVAL AMDGPUGridValues 38 /// #else 39 /// #define GRIDVAL NVPTXGridValues 40 /// #endif 41 /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. 42 /// llvm::omp::GRIDVAL().GV_Warp_Size 43 /// 44 /// Example usage in libomptarget hsa plugin: 45 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 46 /// #define GRIDVAL AMDGPUGridValues 47 /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. 48 /// llvm::omp::GRIDVAL().GV_Warp_Size 49 /// 50 /// Example usage in libomptarget cuda plugin: 51 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 52 /// #define GRIDVAL NVPTXGridValues 53 /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. 54 /// llvm::omp::GRIDVAL().GV_Warp_Size 55 /// 56 57 struct GV { 58 /// The size reserved for data in a shared memory slot. 59 const unsigned GV_Slot_Size; 60 /// The default value of maximum number of threads in a worker warp. 61 const unsigned GV_Warp_Size; 62 63 constexpr unsigned warpSlotSize() const { 64 return GV_Warp_Size * GV_Slot_Size; 65 } 66 67 /// the maximum number of teams. 68 const unsigned GV_Max_Teams; 69 // An alternative to the heavy data sharing infrastructure that uses global 70 // memory is one that uses device __shared__ memory. The amount of such space 71 // (in bytes) reserved by the OpenMP runtime is noted here. 72 const unsigned GV_SimpleBufferSize; 73 // The absolute maximum team size for a working group 74 const unsigned GV_Max_WG_Size; 75 // The default maximum team size for a working group 76 const unsigned GV_Default_WG_Size; 77 78 constexpr unsigned maxWarpNumber() const { 79 return GV_Max_WG_Size / GV_Warp_Size; 80 } 81 }; 82 83 /// For AMDGPU GPUs 84 static constexpr GV AMDGPUGridValues64 = { 85 256, // GV_Slot_Size 86 64, // GV_Warp_Size 87 128, // GV_Max_Teams 88 896, // GV_SimpleBufferSize 89 1024, // GV_Max_WG_Size, 90 256, // GV_Default_WG_Size 91 }; 92 93 static constexpr GV AMDGPUGridValues32 = { 94 256, // GV_Slot_Size 95 32, // GV_Warp_Size 96 128, // GV_Max_Teams 97 896, // GV_SimpleBufferSize 98 1024, // GV_Max_WG_Size, 99 256, // GV_Default_WG_Size 100 }; 101 102 template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { 103 static_assert(wavesize == 32 || wavesize == 64, ""); 104 return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; 105 } 106 107 /// For Nvidia GPUs 108 static constexpr GV NVPTXGridValues = { 109 256, // GV_Slot_Size 110 32, // GV_Warp_Size 111 1024, // GV_Max_Teams 112 896, // GV_SimpleBufferSize 113 1024, // GV_Max_WG_Size 114 128, // GV_Default_WG_Size 115 }; 116 117 } // namespace omp 118 } // namespace llvm 119 120 #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 121