1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Win32 specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/Twine.h"
15
16#include "llvm/Support/Windows/WindowsSupport.h"
17#include <process.h>
18
19#include <bitset>
20
21// Windows will at times define MemoryFence.
22#ifdef MemoryFence
23#undef MemoryFence
24#endif
25
26namespace llvm {
27HANDLE
28llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
29                            std::optional<unsigned> StackSizeInBytes) {
30  HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
31                                            ThreadFunc, Arg, 0, NULL);
32
33  if (!hThread) {
34    ReportLastErrorFatal("_beginthreadex failed");
35  }
36
37  return hThread;
38}
39
40void llvm_thread_join_impl(HANDLE hThread) {
41  if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
42    ReportLastErrorFatal("WaitForSingleObject failed");
43  }
44}
45
46void llvm_thread_detach_impl(HANDLE hThread) {
47  if (::CloseHandle(hThread) == FALSE) {
48    ReportLastErrorFatal("CloseHandle failed");
49  }
50}
51
52DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
53
54DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
55
56} // namespace llvm
57
58uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
59
60uint32_t llvm::get_max_thread_name_length() { return 0; }
61
62#if defined(_MSC_VER)
63static void SetThreadName(DWORD Id, LPCSTR Name) {
64  constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
65
66#pragma pack(push, 8)
67  struct THREADNAME_INFO {
68    DWORD dwType;     // Must be 0x1000.
69    LPCSTR szName;    // Pointer to thread name
70    DWORD dwThreadId; // Thread ID (-1 == current thread)
71    DWORD dwFlags;    // Reserved.  Do not use.
72  };
73#pragma pack(pop)
74
75  THREADNAME_INFO info;
76  info.dwType = 0x1000;
77  info.szName = Name;
78  info.dwThreadId = Id;
79  info.dwFlags = 0;
80
81  __try {
82    ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
83                     (ULONG_PTR *)&info);
84  } __except (EXCEPTION_EXECUTE_HANDLER) {
85  }
86}
87#endif
88
89void llvm::set_thread_name(const Twine &Name) {
90#if defined(_MSC_VER)
91  // Make sure the input is null terminated.
92  SmallString<64> Storage;
93  StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
94  SetThreadName(::GetCurrentThreadId(), NameStr.data());
95#endif
96}
97
98void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
99  // "Name" is not an inherent property of a thread on Windows.  In fact, when
100  // you "set" the name, you are only firing a one-time message to a debugger
101  // which it interprets as a program setting its threads' name.  We may be
102  // able to get fancy by creating a TLS entry when someone calls
103  // set_thread_name so that subsequent calls to get_thread_name return this
104  // value.
105  Name.clear();
106}
107
108SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
109  // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
110  // Begin background processing mode. The system lowers the resource scheduling
111  // priorities of the thread so that it can perform background work without
112  // significantly affecting activity in the foreground.
113  // End background processing mode. The system restores the resource scheduling
114  // priorities of the thread as they were before the thread entered background
115  // processing mode.
116  //
117  // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
118  return SetThreadPriority(GetCurrentThread(),
119                           Priority != ThreadPriority::Default
120                               ? THREAD_MODE_BACKGROUND_BEGIN
121                               : THREAD_MODE_BACKGROUND_END)
122             ? SetThreadPriorityResult::SUCCESS
123             : SetThreadPriorityResult::FAILURE;
124}
125
126struct ProcessorGroup {
127  unsigned ID;
128  unsigned AllThreads;
129  unsigned UsableThreads;
130  unsigned ThreadsPerCore;
131  uint64_t Affinity;
132
133  unsigned useableCores() const {
134    return std::max(1U, UsableThreads / ThreadsPerCore);
135  }
136};
137
138template <typename F>
139static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
140  DWORD Len = 0;
141  BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
142  if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
143    return false;
144  }
145  auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
146  R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
147  if (R) {
148    auto *End =
149        (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
150    for (auto *Curr = Info; Curr < End;
151         Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
152                                                            Curr->Size)) {
153      if (Curr->Relationship != Relationship)
154        continue;
155      Fn(Curr);
156    }
157  }
158  free(Info);
159  return true;
160}
161
162static std::optional<std::vector<USHORT>> getActiveGroups() {
163  USHORT Count = 0;
164  if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
165    return std::nullopt;
166
167  if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
168    return std::nullopt;
169
170  std::vector<USHORT> Groups;
171  Groups.resize(Count);
172  if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
173    return std::nullopt;
174
175  return Groups;
176}
177
178static ArrayRef<ProcessorGroup> getProcessorGroups() {
179  auto computeGroups = []() {
180    SmallVector<ProcessorGroup, 4> Groups;
181
182    auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
183      GROUP_RELATIONSHIP &El = ProcInfo->Group;
184      for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
185        ProcessorGroup G;
186        G.ID = Groups.size();
187        G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
188        G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
189        assert(G.UsableThreads <= 64);
190        G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
191        Groups.push_back(G);
192      }
193    };
194
195    if (!IterateProcInfo(RelationGroup, HandleGroup))
196      return std::vector<ProcessorGroup>();
197
198    auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
199      PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
200      assert(El.GroupCount == 1);
201      unsigned NumHyperThreads = 1;
202      // If the flag is set, each core supports more than one hyper-thread.
203      if (El.Flags & LTP_PC_SMT)
204        NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
205      unsigned I = El.GroupMask[0].Group;
206      Groups[I].ThreadsPerCore = NumHyperThreads;
207    };
208
209    if (!IterateProcInfo(RelationProcessorCore, HandleProc))
210      return std::vector<ProcessorGroup>();
211
212    auto ActiveGroups = getActiveGroups();
213    if (!ActiveGroups)
214      return std::vector<ProcessorGroup>();
215
216    // If there's an affinity mask set, assume the user wants to constrain the
217    // current process to only a single CPU group. On Windows, it is not
218    // possible for affinity masks to cross CPU group boundaries.
219    DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
220    if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
221                                 &SystemAffinityMask)) {
222
223      if (ProcessAffinityMask != SystemAffinityMask) {
224        if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
225          // The process affinity mask is spurious, due to an OS bug, ignore it.
226          return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
227        }
228
229        assert(ActiveGroups->size() == 1 &&
230               "When an affinity mask is set, the process is expected to be "
231               "assigned to a single processor group!");
232
233        unsigned CurrentGroupID = (*ActiveGroups)[0];
234        ProcessorGroup NewG{Groups[CurrentGroupID]};
235        NewG.Affinity = ProcessAffinityMask;
236        NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
237        Groups.clear();
238        Groups.push_back(NewG);
239      }
240    }
241    return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
242  };
243  static auto Groups = computeGroups();
244  return ArrayRef<ProcessorGroup>(Groups);
245}
246
247template <typename R, typename UnaryPredicate>
248static unsigned aggregate(R &&Range, UnaryPredicate P) {
249  unsigned I{};
250  for (const auto &It : Range)
251    I += P(It);
252  return I;
253}
254
255int llvm::get_physical_cores() {
256  static unsigned Cores =
257      aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
258        return G.UsableThreads / G.ThreadsPerCore;
259      });
260  return Cores;
261}
262
263static int computeHostNumHardwareThreads() {
264  static unsigned Threads =
265      aggregate(getProcessorGroups(),
266                [](const ProcessorGroup &G) { return G.UsableThreads; });
267  return Threads;
268}
269
270// Finds the proper CPU socket where a thread number should go. Returns
271// 'std::nullopt' if the thread shall remain on the actual CPU socket.
272std::optional<unsigned>
273llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
274  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
275  // Only one CPU socket in the system or process affinity was set, no need to
276  // move the thread(s) to another CPU socket.
277  if (Groups.size() <= 1)
278    return std::nullopt;
279
280  // We ask for less threads than there are hardware threads per CPU socket, no
281  // need to dispatch threads to other CPU sockets.
282  unsigned MaxThreadsPerSocket =
283      UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
284  if (compute_thread_count() <= MaxThreadsPerSocket)
285    return std::nullopt;
286
287  assert(ThreadPoolNum < compute_thread_count() &&
288         "The thread index is not within thread strategy's range!");
289
290  // Assumes the same number of hardware threads per CPU socket.
291  return (ThreadPoolNum * Groups.size()) / compute_thread_count();
292}
293
294// Assign the current thread to a more appropriate CPU socket or CPU group
295void llvm::ThreadPoolStrategy::apply_thread_strategy(
296    unsigned ThreadPoolNum) const {
297
298  // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
299  // since a process automatically gains access to all processor groups.
300  if (llvm::RunningWindows11OrGreater())
301    return;
302
303  std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
304  if (!Socket)
305    return;
306  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
307  GROUP_AFFINITY Affinity{};
308  Affinity.Group = Groups[*Socket].ID;
309  Affinity.Mask = Groups[*Socket].Affinity;
310  SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
311}
312
313llvm::BitVector llvm::get_thread_affinity_mask() {
314  GROUP_AFFINITY Affinity{};
315  GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
316
317  static unsigned All =
318      aggregate(getProcessorGroups(),
319                [](const ProcessorGroup &G) { return G.AllThreads; });
320
321  unsigned StartOffset =
322      aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
323        return G.ID < Affinity.Group ? G.AllThreads : 0;
324      });
325
326  llvm::BitVector V;
327  V.resize(All);
328  for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
329    if ((Affinity.Mask >> I) & 1)
330      V.set(StartOffset + I);
331  }
332  return V;
333}
334
335unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
336