1 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// \brief Provides definitions for Target specific Grid Values 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 15 #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 16 17 namespace llvm { 18 19 namespace omp { 20 21 /// \brief Defines various target-specific GPU grid values that must be 22 /// consistent between host RTL (plugin), device RTL, and clang. 23 /// We can change grid values for a "fat" binary so that different 24 /// passes get the correct values when generating code for a 25 /// multi-target binary. Both amdgcn and nvptx values are stored in 26 /// this file. In the future, should there be differences between GPUs 27 /// of the same architecture, then simply make a different array and 28 /// use the new array name. 29 /// 30 /// Example usage in clang: 31 /// const unsigned slot_size = 32 /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; 33 /// 34 /// Example usage in libomptarget/deviceRTLs: 35 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 36 /// #ifdef __AMDGPU__ 37 /// #define GRIDVAL AMDGPUGridValues 38 /// #else 39 /// #define GRIDVAL NVPTXGridValues 40 /// #endif 41 /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. 42 /// llvm::omp::GRIDVAL().GV_Warp_Size 43 /// 44 /// Example usage in libomptarget hsa plugin: 45 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 46 /// #define GRIDVAL AMDGPUGridValues 47 /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. 48 /// llvm::omp::GRIDVAL().GV_Warp_Size 49 /// 50 /// Example usage in libomptarget cuda plugin: 51 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 52 /// #define GRIDVAL NVPTXGridValues 53 /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. 54 /// llvm::omp::GRIDVAL().GV_Warp_Size 55 /// 56 57 struct GV { 58 /// The size reserved for data in a shared memory slot. 59 unsigned GV_Slot_Size; 60 /// The default value of maximum number of threads in a worker warp. 61 unsigned GV_Warp_Size; 62 63 constexpr unsigned warpSlotSize() const { 64 return GV_Warp_Size * GV_Slot_Size; 65 } 66 67 /// the maximum number of teams. 68 unsigned GV_Max_Teams; 69 // The default number of teams in the absence of any other information. 70 unsigned GV_Default_Num_Teams; 71 72 // An alternative to the heavy data sharing infrastructure that uses global 73 // memory is one that uses device __shared__ memory. The amount of such space 74 // (in bytes) reserved by the OpenMP runtime is noted here. 75 unsigned GV_SimpleBufferSize; 76 // The absolute maximum team size for a working group 77 unsigned GV_Max_WG_Size; 78 // The default maximum team size for a working group 79 unsigned GV_Default_WG_Size; 80 81 constexpr unsigned maxWarpNumber() const { 82 return GV_Max_WG_Size / GV_Warp_Size; 83 } 84 }; 85 86 /// For AMDGPU GPUs 87 static constexpr GV AMDGPUGridValues64 = { 88 256, // GV_Slot_Size 89 64, // GV_Warp_Size 90 (1 << 16), // GV_Max_Teams 91 440, // GV_Default_Num_Teams 92 896, // GV_SimpleBufferSize 93 1024, // GV_Max_WG_Size, 94 256, // GV_Default_WG_Size 95 }; 96 97 static constexpr GV AMDGPUGridValues32 = { 98 256, // GV_Slot_Size 99 32, // GV_Warp_Size 100 (1 << 16), // GV_Max_Teams 101 440, // GV_Default_Num_Teams 102 896, // GV_SimpleBufferSize 103 1024, // GV_Max_WG_Size, 104 256, // GV_Default_WG_Size 105 }; 106 107 template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { 108 static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize"); 109 return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; 110 } 111 112 /// For Nvidia GPUs 113 static constexpr GV NVPTXGridValues = { 114 256, // GV_Slot_Size 115 32, // GV_Warp_Size 116 (1 << 16), // GV_Max_Teams 117 3200, // GV_Default_Num_Teams 118 896, // GV_SimpleBufferSize 119 1024, // GV_Max_WG_Size 120 128, // GV_Default_WG_Size 121 }; 122 123 } // namespace omp 124 } // namespace llvm 125 126 #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 127