1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Win32 specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/Twine.h"
15
16#include "llvm/Support/Windows/WindowsSupport.h"
17#include <process.h>
18
19#include <bitset>
20
21// Windows will at times define MemoryFence.
22#ifdef MemoryFence
23#undef MemoryFence
24#endif
25
26namespace llvm {
27HANDLE
28llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
29                            llvm::Optional<unsigned> StackSizeInBytes) {
30  HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
31                                            ThreadFunc, Arg, 0, NULL);
32
33  if (!hThread) {
34    ReportLastErrorFatal("_beginthreadex failed");
35  }
36
37  return hThread;
38}
39
40void llvm_thread_join_impl(HANDLE hThread) {
41  if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
42    ReportLastErrorFatal("WaitForSingleObject failed");
43  }
44}
45
46void llvm_thread_detach_impl(HANDLE hThread) {
47  if (::CloseHandle(hThread) == FALSE) {
48    ReportLastErrorFatal("CloseHandle failed");
49  }
50}
51
52DWORD llvm_thread_get_id_impl(HANDLE hThread) {
53  return ::GetThreadId(hThread);
54}
55
56DWORD llvm_thread_get_current_id_impl() {
57  return ::GetCurrentThreadId();
58}
59
60} // namespace llvm
61
62uint64_t llvm::get_threadid() {
63  return uint64_t(::GetCurrentThreadId());
64}
65
66uint32_t llvm::get_max_thread_name_length() { return 0; }
67
68#if defined(_MSC_VER)
69static void SetThreadName(DWORD Id, LPCSTR Name) {
70  constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
71
72#pragma pack(push, 8)
73  struct THREADNAME_INFO {
74    DWORD dwType;     // Must be 0x1000.
75    LPCSTR szName;    // Pointer to thread name
76    DWORD dwThreadId; // Thread ID (-1 == current thread)
77    DWORD dwFlags;    // Reserved.  Do not use.
78  };
79#pragma pack(pop)
80
81  THREADNAME_INFO info;
82  info.dwType = 0x1000;
83  info.szName = Name;
84  info.dwThreadId = Id;
85  info.dwFlags = 0;
86
87  __try {
88    ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
89      (ULONG_PTR *)&info);
90  }
91  __except (EXCEPTION_EXECUTE_HANDLER) {
92  }
93}
94#endif
95
96void llvm::set_thread_name(const Twine &Name) {
97#if defined(_MSC_VER)
98  // Make sure the input is null terminated.
99  SmallString<64> Storage;
100  StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
101  SetThreadName(::GetCurrentThreadId(), NameStr.data());
102#endif
103}
104
105void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
106  // "Name" is not an inherent property of a thread on Windows.  In fact, when
107  // you "set" the name, you are only firing a one-time message to a debugger
108  // which it interprets as a program setting its threads' name.  We may be
109  // able to get fancy by creating a TLS entry when someone calls
110  // set_thread_name so that subsequent calls to get_thread_name return this
111  // value.
112  Name.clear();
113}
114
115SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
116  // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
117  // Begin background processing mode. The system lowers the resource scheduling
118  // priorities of the thread so that it can perform background work without
119  // significantly affecting activity in the foreground.
120  // End background processing mode. The system restores the resource scheduling
121  // priorities of the thread as they were before the thread entered background
122  // processing mode.
123  //
124  // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
125  return SetThreadPriority(GetCurrentThread(),
126                           Priority != ThreadPriority::Default
127                               ? THREAD_MODE_BACKGROUND_BEGIN
128                               : THREAD_MODE_BACKGROUND_END)
129             ? SetThreadPriorityResult::SUCCESS
130             : SetThreadPriorityResult::FAILURE;
131}
132
133struct ProcessorGroup {
134  unsigned ID;
135  unsigned AllThreads;
136  unsigned UsableThreads;
137  unsigned ThreadsPerCore;
138  uint64_t Affinity;
139
140  unsigned useableCores() const {
141    return std::max(1U, UsableThreads / ThreadsPerCore);
142  }
143};
144
145template <typename F>
146static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
147  DWORD Len = 0;
148  BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
149  if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
150    return false;
151  }
152  auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
153  R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
154  if (R) {
155    auto *End =
156        (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
157    for (auto *Curr = Info; Curr < End;
158         Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
159                                                            Curr->Size)) {
160      if (Curr->Relationship != Relationship)
161        continue;
162      Fn(Curr);
163    }
164  }
165  free(Info);
166  return true;
167}
168
169static ArrayRef<ProcessorGroup> getProcessorGroups() {
170  auto computeGroups = []() {
171    SmallVector<ProcessorGroup, 4> Groups;
172
173    auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
174      GROUP_RELATIONSHIP &El = ProcInfo->Group;
175      for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
176        ProcessorGroup G;
177        G.ID = Groups.size();
178        G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
179        G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
180        assert(G.UsableThreads <= 64);
181        G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
182        Groups.push_back(G);
183      }
184    };
185
186    if (!IterateProcInfo(RelationGroup, HandleGroup))
187      return std::vector<ProcessorGroup>();
188
189    auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
190      PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
191      assert(El.GroupCount == 1);
192      unsigned NumHyperThreads = 1;
193      // If the flag is set, each core supports more than one hyper-thread.
194      if (El.Flags & LTP_PC_SMT)
195        NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
196      unsigned I = El.GroupMask[0].Group;
197      Groups[I].ThreadsPerCore = NumHyperThreads;
198    };
199
200    if (!IterateProcInfo(RelationProcessorCore, HandleProc))
201      return std::vector<ProcessorGroup>();
202
203    // If there's an affinity mask set, assume the user wants to constrain the
204    // current process to only a single CPU group. On Windows, it is not
205    // possible for affinity masks to cross CPU group boundaries.
206    DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
207    if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
208                                 &SystemAffinityMask) &&
209        ProcessAffinityMask != SystemAffinityMask) {
210      // We don't expect more that 4 CPU groups on Windows (256 processors).
211      USHORT GroupCount = 4;
212      USHORT GroupArray[4]{};
213      if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
214                                    GroupArray)) {
215        assert(GroupCount == 1 &&
216               "On startup, a program is expected to be assigned only to "
217               "one processor group!");
218        unsigned CurrentGroupID = GroupArray[0];
219        ProcessorGroup NewG{Groups[CurrentGroupID]};
220        NewG.Affinity = ProcessAffinityMask;
221        NewG.UsableThreads = countPopulation(ProcessAffinityMask);
222        Groups.clear();
223        Groups.push_back(NewG);
224      }
225    }
226
227    return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
228  };
229  static auto Groups = computeGroups();
230  return ArrayRef<ProcessorGroup>(Groups);
231}
232
233template <typename R, typename UnaryPredicate>
234static unsigned aggregate(R &&Range, UnaryPredicate P) {
235  unsigned I{};
236  for (const auto &It : Range)
237    I += P(It);
238  return I;
239}
240
241// for sys::getHostNumPhysicalCores
242int computeHostNumPhysicalCores() {
243  static unsigned Cores =
244      aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
245        return G.UsableThreads / G.ThreadsPerCore;
246      });
247  return Cores;
248}
249
250int computeHostNumHardwareThreads() {
251  static unsigned Threads =
252      aggregate(getProcessorGroups(),
253                [](const ProcessorGroup &G) { return G.UsableThreads; });
254  return Threads;
255}
256
257// Finds the proper CPU socket where a thread number should go. Returns 'None'
258// if the thread shall remain on the actual CPU socket.
259Optional<unsigned>
260llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
261  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
262  // Only one CPU socket in the system or process affinity was set, no need to
263  // move the thread(s) to another CPU socket.
264  if (Groups.size() <= 1)
265    return None;
266
267  // We ask for less threads than there are hardware threads per CPU socket, no
268  // need to dispatch threads to other CPU sockets.
269  unsigned MaxThreadsPerSocket =
270      UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
271  if (compute_thread_count() <= MaxThreadsPerSocket)
272    return None;
273
274  assert(ThreadPoolNum < compute_thread_count() &&
275         "The thread index is not within thread strategy's range!");
276
277  // Assumes the same number of hardware threads per CPU socket.
278  return (ThreadPoolNum * Groups.size()) / compute_thread_count();
279}
280
281// Assign the current thread to a more appropriate CPU socket or CPU group
282void llvm::ThreadPoolStrategy::apply_thread_strategy(
283    unsigned ThreadPoolNum) const {
284  Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
285  if (!Socket)
286    return;
287  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
288  GROUP_AFFINITY Affinity{};
289  Affinity.Group = Groups[*Socket].ID;
290  Affinity.Mask = Groups[*Socket].Affinity;
291  SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
292}
293
294llvm::BitVector llvm::get_thread_affinity_mask() {
295  GROUP_AFFINITY Affinity{};
296  GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
297
298  static unsigned All =
299      aggregate(getProcessorGroups(),
300                [](const ProcessorGroup &G) { return G.AllThreads; });
301
302  unsigned StartOffset =
303      aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
304        return G.ID < Affinity.Group ? G.AllThreads : 0;
305      });
306
307  llvm::BitVector V;
308  V.resize(All);
309  for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
310    if ((Affinity.Mask >> I) & 1)
311      V.set(StartOffset + I);
312  }
313  return V;
314}
315
316unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
317