1 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Provides definitions for Target specific Grid Values
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
15 #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
16 
17 namespace llvm {
18 
19 namespace omp {
20 
21 /// \brief Defines various target-specific GPU grid values that must be
22 ///        consistent between host RTL (plugin), device RTL, and clang.
23 ///        We can change grid values for a "fat" binary so that different
24 ///        passes get the correct values when generating code for a
25 ///        multi-target binary. Both amdgcn and nvptx values are stored in
26 ///        this file. In the future, should there be differences between GPUs
27 ///        of the same architecture, then simply make a different array and
28 ///        use the new array name.
29 ///
30 /// Example usage in clang:
31 ///   const unsigned slot_size =
32 ///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
33 ///
34 /// Example usage in libomptarget/deviceRTLs:
35 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
36 ///   #ifdef __AMDGPU__
37 ///     #define GRIDVAL AMDGPUGridValues
38 ///   #else
39 ///     #define GRIDVAL NVPTXGridValues
40 ///   #endif
41 ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42 ///   llvm::omp::GRIDVAL().GV_Warp_Size
43 ///
44 /// Example usage in libomptarget hsa plugin:
45 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
46 ///   #define GRIDVAL AMDGPUGridValues
47 ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48 ///   llvm::omp::GRIDVAL().GV_Warp_Size
49 ///
50 /// Example usage in libomptarget cuda plugin:
51 ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
52 ///    #define GRIDVAL NVPTXGridValues
53 ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54 ///    llvm::omp::GRIDVAL().GV_Warp_Size
55 ///
56 
57 struct GV {
58   /// The size reserved for data in a shared memory slot.
59   unsigned GV_Slot_Size;
60   /// The default value of maximum number of threads in a worker warp.
61   unsigned GV_Warp_Size;
62 
63   constexpr unsigned warpSlotSize() const {
64     return GV_Warp_Size * GV_Slot_Size;
65   }
66 
67   /// the maximum number of teams.
68   unsigned GV_Max_Teams;
69   // The default number of teams in the absence of any other information.
70   unsigned GV_Default_Num_Teams;
71 
72   // An alternative to the heavy data sharing infrastructure that uses global
73   // memory is one that uses device __shared__ memory.  The amount of such space
74   // (in bytes) reserved by the OpenMP runtime is noted here.
75   unsigned GV_SimpleBufferSize;
76   // The absolute maximum team size for a working group
77   unsigned GV_Max_WG_Size;
78   // The default maximum team size for a working group
79   unsigned GV_Default_WG_Size;
80 
81   constexpr unsigned maxWarpNumber() const {
82     return GV_Max_WG_Size / GV_Warp_Size;
83   }
84 };
85 
86 /// For AMDGPU GPUs
87 static constexpr GV AMDGPUGridValues64 = {
88     256,       // GV_Slot_Size
89     64,        // GV_Warp_Size
90     (1 << 16), // GV_Max_Teams
91     440,       // GV_Default_Num_Teams
92     896,       // GV_SimpleBufferSize
93     1024,      // GV_Max_WG_Size,
94     256,       // GV_Default_WG_Size
95 };
96 
97 static constexpr GV AMDGPUGridValues32 = {
98     256,       // GV_Slot_Size
99     32,        // GV_Warp_Size
100     (1 << 16), // GV_Max_Teams
101     440,       // GV_Default_Num_Teams
102     896,       // GV_SimpleBufferSize
103     1024,      // GV_Max_WG_Size,
104     256,       // GV_Default_WG_Size
105 };
106 
107 template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
108   static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
109   return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
110 }
111 
112 /// For Nvidia GPUs
113 static constexpr GV NVPTXGridValues = {
114     256,       // GV_Slot_Size
115     32,        // GV_Warp_Size
116     (1 << 16), // GV_Max_Teams
117     3200,      // GV_Default_Num_Teams
118     896,       // GV_SimpleBufferSize
119     1024,      // GV_Max_WG_Size
120     128,       // GV_Default_WG_Size
121 };
122 
123 } // namespace omp
124 } // namespace llvm
125 
126 #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
127