1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file declares helper functions for running LLVM in a multi-threaded 10 // environment. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_SUPPORT_THREADING_H 15 #define LLVM_SUPPORT_THREADING_H 16 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/ADT/FunctionExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX 22 #include "llvm/Support/Compiler.h" 23 #include <ciso646> // So we can check the C++ standard lib macros. 24 #include <functional> 25 26 #if defined(_MSC_VER) 27 // MSVC's call_once implementation worked since VS 2015, which is the minimum 28 // supported version as of this writing. 29 #define LLVM_THREADING_USE_STD_CALL_ONCE 1 30 #elif defined(LLVM_ON_UNIX) && \ 31 (defined(_LIBCPP_VERSION) || \ 32 !(defined(__NetBSD__) || defined(__OpenBSD__) || \ 33 (defined(__ppc__) || defined(__PPC__)))) 34 // std::call_once from libc++ is used on all Unix platforms. Other 35 // implementations like libstdc++ are known to have problems on NetBSD, 36 // OpenBSD and PowerPC. 37 #define LLVM_THREADING_USE_STD_CALL_ONCE 1 38 #elif defined(LLVM_ON_UNIX) && \ 39 ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__)) 40 #define LLVM_THREADING_USE_STD_CALL_ONCE 1 41 #else 42 #define LLVM_THREADING_USE_STD_CALL_ONCE 0 43 #endif 44 45 #if LLVM_THREADING_USE_STD_CALL_ONCE 46 #include <mutex> 47 #else 48 #include "llvm/Support/Atomic.h" 49 #endif 50 51 namespace llvm { 52 class Twine; 53 54 /// Returns true if LLVM is compiled with support for multi-threading, and 55 /// false otherwise. 56 bool llvm_is_multithreaded(); 57 58 #if LLVM_THREADING_USE_STD_CALL_ONCE 59 60 typedef std::once_flag once_flag; 61 62 #else 63 64 enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 }; 65 66 /// The llvm::once_flag structure 67 /// 68 /// This type is modeled after std::once_flag to use with llvm::call_once. 69 /// This structure must be used as an opaque object. It is a struct to force 70 /// autoinitialization and behave like std::once_flag. 71 struct once_flag { 72 volatile sys::cas_flag status = Uninitialized; 73 }; 74 75 #endif 76 77 /// Execute the function specified as a parameter once. 78 /// 79 /// Typical usage: 80 /// \code 81 /// void foo() {...}; 82 /// ... 83 /// static once_flag flag; 84 /// call_once(flag, foo); 85 /// \endcode 86 /// 87 /// \param flag Flag used for tracking whether or not this has run. 88 /// \param F Function to call once. 89 template <typename Function, typename... Args> call_once(once_flag & flag,Function && F,Args &&...ArgList)90 void call_once(once_flag &flag, Function &&F, Args &&... ArgList) { 91 #if LLVM_THREADING_USE_STD_CALL_ONCE 92 std::call_once(flag, std::forward<Function>(F), 93 std::forward<Args>(ArgList)...); 94 #else 95 // For other platforms we use a generic (if brittle) version based on our 96 // atomics. 97 sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized); 98 if (old_val == Uninitialized) { 99 std::forward<Function>(F)(std::forward<Args>(ArgList)...); 100 sys::MemoryFence(); 101 TsanIgnoreWritesBegin(); 102 TsanHappensBefore(&flag.status); 103 flag.status = Done; 104 TsanIgnoreWritesEnd(); 105 } else { 106 // Wait until any thread doing the call has finished. 107 sys::cas_flag tmp = flag.status; 108 sys::MemoryFence(); 109 while (tmp != Done) { 110 tmp = flag.status; 111 sys::MemoryFence(); 112 } 113 } 114 TsanHappensAfter(&flag.status); 115 #endif 116 } 117 118 /// This tells how a thread pool will be used 119 class ThreadPoolStrategy { 120 public: 121 // The default value (0) means all available threads should be used, 122 // taking the affinity mask into account. If set, this value only represents 123 // a suggested high bound, the runtime might choose a lower value (not 124 // higher). 125 unsigned ThreadsRequested = 0; 126 127 // If SMT is active, use hyper threads. If false, there will be only one 128 // std::thread per core. 129 bool UseHyperThreads = true; 130 131 // If set, will constrain 'ThreadsRequested' to the number of hardware 132 // threads, or hardware cores. 133 bool Limit = false; 134 135 /// Retrieves the max available threads for the current strategy. This 136 /// accounts for affinity masks and takes advantage of all CPU sockets. 137 unsigned compute_thread_count() const; 138 139 /// Assign the current thread to an ideal hardware CPU or NUMA node. In a 140 /// multi-socket system, this ensures threads are assigned to all CPU 141 /// sockets. \p ThreadPoolNum represents a number bounded by [0, 142 /// compute_thread_count()). 143 void apply_thread_strategy(unsigned ThreadPoolNum) const; 144 145 /// Finds the CPU socket where a thread should go. Returns 'None' if the 146 /// thread shall remain on the actual CPU socket. 147 Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const; 148 }; 149 150 /// Build a strategy from a number of threads as a string provided in \p Num. 151 /// When Num is above the max number of threads specified by the \p Default 152 /// strategy, we attempt to equally allocate the threads on all CPU sockets. 153 /// "0" or an empty string will return the \p Default strategy. 154 /// "all" for using all hardware threads. 155 Optional<ThreadPoolStrategy> 156 get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {}); 157 158 /// Returns a thread strategy for tasks requiring significant memory or other 159 /// resources. To be used for workloads where hardware_concurrency() proves to 160 /// be less efficient. Avoid this strategy if doing lots of I/O. Currently 161 /// based on physical cores, if available for the host system, otherwise falls 162 /// back to hardware_concurrency(). Returns 1 when LLVM is configured with 163 /// LLVM_ENABLE_THREADS = OFF. 164 inline ThreadPoolStrategy 165 heavyweight_hardware_concurrency(unsigned ThreadCount = 0) { 166 ThreadPoolStrategy S; 167 S.UseHyperThreads = false; 168 S.ThreadsRequested = ThreadCount; 169 return S; 170 } 171 172 /// Like heavyweight_hardware_concurrency() above, but builds a strategy 173 /// based on the rules described for get_threadpool_strategy(). 174 /// If \p Num is invalid, returns a default strategy where one thread per 175 /// hardware core is used. heavyweight_hardware_concurrency(StringRef Num)176 inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) { 177 Optional<ThreadPoolStrategy> S = 178 get_threadpool_strategy(Num, heavyweight_hardware_concurrency()); 179 if (S) 180 return *S; 181 return heavyweight_hardware_concurrency(); 182 } 183 184 /// Returns a default thread strategy where all available hardware resources 185 /// are to be used, except for those initially excluded by an affinity mask. 186 /// This function takes affinity into consideration. Returns 1 when LLVM is 187 /// configured with LLVM_ENABLE_THREADS=OFF. 188 inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) { 189 ThreadPoolStrategy S; 190 S.ThreadsRequested = ThreadCount; 191 return S; 192 } 193 194 /// Returns an optimal thread strategy to execute specified amount of tasks. 195 /// This strategy should prevent us from creating too many threads if we 196 /// occasionaly have an unexpectedly small amount of tasks. 197 inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) { 198 ThreadPoolStrategy S; 199 S.Limit = true; 200 S.ThreadsRequested = TaskCount; 201 return S; 202 } 203 204 /// Return the current thread id, as used in various OS system calls. 205 /// Note that not all platforms guarantee that the value returned will be 206 /// unique across the entire system, so portable code should not assume 207 /// this. 208 uint64_t get_threadid(); 209 210 /// Get the maximum length of a thread name on this platform. 211 /// A value of 0 means there is no limit. 212 uint32_t get_max_thread_name_length(); 213 214 /// Set the name of the current thread. Setting a thread's name can 215 /// be helpful for enabling useful diagnostics under a debugger or when 216 /// logging. The level of support for setting a thread's name varies 217 /// wildly across operating systems, and we only make a best effort to 218 /// perform the operation on supported platforms. No indication of success 219 /// or failure is returned. 220 void set_thread_name(const Twine &Name); 221 222 /// Get the name of the current thread. The level of support for 223 /// getting a thread's name varies wildly across operating systems, and it 224 /// is not even guaranteed that if you can successfully set a thread's name 225 /// that you can later get it back. This function is intended for diagnostic 226 /// purposes, and as with setting a thread's name no indication of whether 227 /// the operation succeeded or failed is returned. 228 void get_thread_name(SmallVectorImpl<char> &Name); 229 230 /// Returns a mask that represents on which hardware thread, core, CPU, NUMA 231 /// group, the calling thread can be executed. On Windows, threads cannot 232 /// cross CPU sockets boundaries. 233 llvm::BitVector get_thread_affinity_mask(); 234 235 /// Returns how many physical CPUs or NUMA groups the system has. 236 unsigned get_cpus(); 237 238 enum class ThreadPriority { 239 Background = 0, 240 Default = 1, 241 }; 242 /// If priority is Background tries to lower current threads priority such 243 /// that it does not affect foreground tasks significantly. Can be used for 244 /// long-running, latency-insensitive tasks to make sure cpu is not hogged by 245 /// this task. 246 /// If the priority is default tries to restore current threads priority to 247 /// default scheduling priority. 248 enum class SetThreadPriorityResult { FAILURE, SUCCESS }; 249 SetThreadPriorityResult set_thread_priority(ThreadPriority Priority); 250 } 251 252 #endif 253