1 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Provides definitions for Target specific Grid Values
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_OPENMP_GRIDVALUES_H
16 #define LLVM_OPENMP_GRIDVALUES_H
17 
18 namespace llvm {
19 
20 namespace omp {
21 
22 /// \brief Defines various target-specific GPU grid values that must be
23 ///        consistent between host RTL (plugin), device RTL, and clang.
24 ///        We can change grid values for a "fat" binary so that different
25 ///        passes get the correct values when generating code for a
26 ///        multi-target binary. Both amdgcn and nvptx values are stored in
27 ///        this file. In the future, should there be differences between GPUs
28 ///        of the same architecture, then simply make a different array and
29 ///        use the new array name.
30 ///
31 /// Example usage in clang:
32 ///   const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
33 ///
34 /// Example usage in libomptarget/deviceRTLs:
35 ///   #include "OMPGridValues.h"
36 ///   #ifdef __AMDGPU__
37 ///     #define GRIDVAL AMDGPUGpuGridValues
38 ///   #else
39 ///     #define GRIDVAL NVPTXGpuGridValues
40 ///   #endif
41 ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42 ///   GRIDVAL[GV_Warp_Size]
43 ///
44 /// Example usage in libomptarget hsa plugin:
45 ///   #include "OMPGridValues.h"
46 ///   #define GRIDVAL AMDGPUGpuGridValues
47 ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48 ///   GRIDVAL[GV_Warp_Size]
49 ///
50 /// Example usage in libomptarget cuda plugin:
51 ///    #include "OMPGridValues.h"
52 ///    #define GRIDVAL NVPTXGpuGridValues
53 ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54 ///    GRIDVAL[GV_Warp_Size]
55 ///
56 enum GVIDX {
57   /// The maximum number of workers in a kernel.
58   /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
59   GV_Threads,
60   /// The size reserved for data in a shared memory slot.
61   GV_Slot_Size,
62   /// The default value of maximum number of threads in a worker warp.
63   GV_Warp_Size,
64   /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
65   /// for NVPTX.
66   GV_Warp_Size_32,
67   /// The number of bits required to represent the max number of threads in warp
68   GV_Warp_Size_Log2,
69   /// GV_Warp_Size * GV_Slot_Size,
70   GV_Warp_Slot_Size,
71   /// the maximum number of teams.
72   GV_Max_Teams,
73   /// Global Memory Alignment
74   GV_Mem_Align,
75   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
76   GV_Warp_Size_Log2_Mask,
77   // An alternative to the heavy data sharing infrastructure that uses global
78   // memory is one that uses device __shared__ memory.  The amount of such space
79   // (in bytes) reserved by the OpenMP runtime is noted here.
80   GV_SimpleBufferSize,
81   // The absolute maximum team size for a working group
82   GV_Max_WG_Size,
83   // The default maximum team size for a working group
84   GV_Default_WG_Size,
85   // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
86   GV_Max_Warp_Number,
87   /// The slot size that should be reserved for a working warp.
88   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
89   GV_Warp_Size_Log2_MaskL
90 };
91 
92 /// For AMDGPU GPUs
93 static constexpr unsigned AMDGPUGpuGridValues[] = {
94     448,       // GV_Threads
95     256,       // GV_Slot_Size
96     64,        // GV_Warp_Size
97     32,        // GV_Warp_Size_32
98     6,         // GV_Warp_Size_Log2
99     64 * 256,  // GV_Warp_Slot_Size
100     128,       // GV_Max_Teams
101     256,       // GV_Mem_Align
102     63,        // GV_Warp_Size_Log2_Mask
103     896,       // GV_SimpleBufferSize
104     1024,      // GV_Max_WG_Size,
105     256,       // GV_Defaut_WG_Size
106     1024 / 64, // GV_Max_WG_Size / GV_WarpSize
107     63         // GV_Warp_Size_Log2_MaskL
108 };
109 
110 /// For Nvidia GPUs
111 static constexpr unsigned NVPTXGpuGridValues[] = {
112     992,               // GV_Threads
113     256,               // GV_Slot_Size
114     32,                // GV_Warp_Size
115     32,                // GV_Warp_Size_32
116     5,                 // GV_Warp_Size_Log2
117     32 * 256,          // GV_Warp_Slot_Size
118     1024,              // GV_Max_Teams
119     256,               // GV_Mem_Align
120     (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
121     896,               // GV_SimpleBufferSize
122     1024,              // GV_Max_WG_Size
123     128,               // GV_Defaut_WG_Size
124     1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
125     31                 // GV_Warp_Size_Log2_MaskL
126 };
127 
128 } // namespace omp
129 } // namespace llvm
130 
131 #endif // LLVM_OPENMP_GRIDVALUES_H
132