1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Win32 specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/Twine.h"
15
16#include "llvm/Support/Windows/WindowsSupport.h"
17#include <process.h>
18
19#include <bitset>
20
21// Windows will at times define MemoryFence.
22#ifdef MemoryFence
23#undef MemoryFence
24#endif
25
26static unsigned __stdcall threadFuncSync(void *Arg) {
27  SyncThreadInfo *TI = static_cast<SyncThreadInfo *>(Arg);
28  TI->UserFn(TI->UserData);
29  return 0;
30}
31
32static unsigned __stdcall threadFuncAsync(void *Arg) {
33  std::unique_ptr<AsyncThreadInfo> Info(static_cast<AsyncThreadInfo *>(Arg));
34  (*Info)();
35  return 0;
36}
37
38static void
39llvm_execute_on_thread_impl(unsigned (__stdcall *ThreadFunc)(void *), void *Arg,
40                            llvm::Optional<unsigned> StackSizeInBytes,
41                            JoiningPolicy JP) {
42  HANDLE hThread = (HANDLE)::_beginthreadex(
43      NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL);
44
45  if (!hThread) {
46    ReportLastErrorFatal("_beginthreadex failed");
47  }
48
49  if (JP == JoiningPolicy::Join) {
50    if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
51      ReportLastErrorFatal("WaitForSingleObject failed");
52    }
53  }
54  if (::CloseHandle(hThread) == FALSE) {
55    ReportLastErrorFatal("CloseHandle failed");
56  }
57}
58
59uint64_t llvm::get_threadid() {
60  return uint64_t(::GetCurrentThreadId());
61}
62
63uint32_t llvm::get_max_thread_name_length() { return 0; }
64
65#if defined(_MSC_VER)
66static void SetThreadName(DWORD Id, LPCSTR Name) {
67  constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
68
69#pragma pack(push, 8)
70  struct THREADNAME_INFO {
71    DWORD dwType;     // Must be 0x1000.
72    LPCSTR szName;    // Pointer to thread name
73    DWORD dwThreadId; // Thread ID (-1 == current thread)
74    DWORD dwFlags;    // Reserved.  Do not use.
75  };
76#pragma pack(pop)
77
78  THREADNAME_INFO info;
79  info.dwType = 0x1000;
80  info.szName = Name;
81  info.dwThreadId = Id;
82  info.dwFlags = 0;
83
84  __try {
85    ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
86      (ULONG_PTR *)&info);
87  }
88  __except (EXCEPTION_EXECUTE_HANDLER) {
89  }
90}
91#endif
92
93void llvm::set_thread_name(const Twine &Name) {
94#if defined(_MSC_VER)
95  // Make sure the input is null terminated.
96  SmallString<64> Storage;
97  StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
98  SetThreadName(::GetCurrentThreadId(), NameStr.data());
99#endif
100}
101
102void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
103  // "Name" is not an inherent property of a thread on Windows.  In fact, when
104  // you "set" the name, you are only firing a one-time message to a debugger
105  // which it interprets as a program setting its threads' name.  We may be
106  // able to get fancy by creating a TLS entry when someone calls
107  // set_thread_name so that subsequent calls to get_thread_name return this
108  // value.
109  Name.clear();
110}
111
112SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
113  // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
114  // Begin background processing mode. The system lowers the resource scheduling
115  // priorities of the thread so that it can perform background work without
116  // significantly affecting activity in the foreground.
117  // End background processing mode. The system restores the resource scheduling
118  // priorities of the thread as they were before the thread entered background
119  // processing mode.
120  return SetThreadPriority(GetCurrentThread(),
121                           Priority == ThreadPriority::Background
122                               ? THREAD_MODE_BACKGROUND_BEGIN
123                               : THREAD_MODE_BACKGROUND_END)
124             ? SetThreadPriorityResult::SUCCESS
125             : SetThreadPriorityResult::FAILURE;
126}
127
128struct ProcessorGroup {
129  unsigned ID;
130  unsigned AllThreads;
131  unsigned UsableThreads;
132  unsigned ThreadsPerCore;
133  uint64_t Affinity;
134
135  unsigned useableCores() const {
136    return std::max(1U, UsableThreads / ThreadsPerCore);
137  }
138};
139
140template <typename F>
141static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
142  DWORD Len = 0;
143  BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
144  if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
145    return false;
146  }
147  auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
148  R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
149  if (R) {
150    auto *End =
151        (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
152    for (auto *Curr = Info; Curr < End;
153         Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
154                                                            Curr->Size)) {
155      if (Curr->Relationship != Relationship)
156        continue;
157      Fn(Curr);
158    }
159  }
160  free(Info);
161  return true;
162}
163
164static ArrayRef<ProcessorGroup> getProcessorGroups() {
165  auto computeGroups = []() {
166    SmallVector<ProcessorGroup, 4> Groups;
167
168    auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
169      GROUP_RELATIONSHIP &El = ProcInfo->Group;
170      for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
171        ProcessorGroup G;
172        G.ID = Groups.size();
173        G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
174        G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
175        assert(G.UsableThreads <= 64);
176        G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
177        Groups.push_back(G);
178      }
179    };
180
181    if (!IterateProcInfo(RelationGroup, HandleGroup))
182      return std::vector<ProcessorGroup>();
183
184    auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
185      PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
186      assert(El.GroupCount == 1);
187      unsigned NumHyperThreads = 1;
188      // If the flag is set, each core supports more than one hyper-thread.
189      if (El.Flags & LTP_PC_SMT)
190        NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
191      unsigned I = El.GroupMask[0].Group;
192      Groups[I].ThreadsPerCore = NumHyperThreads;
193    };
194
195    if (!IterateProcInfo(RelationProcessorCore, HandleProc))
196      return std::vector<ProcessorGroup>();
197
198    // If there's an affinity mask set, assume the user wants to constrain the
199    // current process to only a single CPU group. On Windows, it is not
200    // possible for affinity masks to cross CPU group boundaries.
201    DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
202    if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
203                                 &SystemAffinityMask) &&
204        ProcessAffinityMask != SystemAffinityMask) {
205      // We don't expect more that 4 CPU groups on Windows (256 processors).
206      USHORT GroupCount = 4;
207      USHORT GroupArray[4]{};
208      if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
209                                    GroupArray)) {
210        assert(GroupCount == 1 &&
211               "On startup, a program is expected to be assigned only to "
212               "one processor group!");
213        unsigned CurrentGroupID = GroupArray[0];
214        ProcessorGroup NewG{Groups[CurrentGroupID]};
215        NewG.Affinity = ProcessAffinityMask;
216        NewG.UsableThreads = countPopulation(ProcessAffinityMask);
217        Groups.clear();
218        Groups.push_back(NewG);
219      }
220    }
221
222    return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
223  };
224  static auto Groups = computeGroups();
225  return ArrayRef<ProcessorGroup>(Groups);
226}
227
228template <typename R, typename UnaryPredicate>
229static unsigned aggregate(R &&Range, UnaryPredicate P) {
230  unsigned I{};
231  for (const auto &It : Range)
232    I += P(It);
233  return I;
234}
235
236// for sys::getHostNumPhysicalCores
237int computeHostNumPhysicalCores() {
238  static unsigned Cores =
239      aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
240        return G.UsableThreads / G.ThreadsPerCore;
241      });
242  return Cores;
243}
244
245int computeHostNumHardwareThreads() {
246  static unsigned Threads =
247      aggregate(getProcessorGroups(),
248                [](const ProcessorGroup &G) { return G.UsableThreads; });
249  return Threads;
250}
251
252// Finds the proper CPU socket where a thread number should go. Returns 'None'
253// if the thread shall remain on the actual CPU socket.
254Optional<unsigned>
255llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
256  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
257  // Only one CPU socket in the system or process affinity was set, no need to
258  // move the thread(s) to another CPU socket.
259  if (Groups.size() <= 1)
260    return None;
261
262  // We ask for less threads than there are hardware threads per CPU socket, no
263  // need to dispatch threads to other CPU sockets.
264  unsigned MaxThreadsPerSocket =
265      UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
266  if (compute_thread_count() <= MaxThreadsPerSocket)
267    return None;
268
269  assert(ThreadPoolNum < compute_thread_count() &&
270         "The thread index is not within thread strategy's range!");
271
272  // Assumes the same number of hardware threads per CPU socket.
273  return (ThreadPoolNum * Groups.size()) / compute_thread_count();
274}
275
276// Assign the current thread to a more appropriate CPU socket or CPU group
277void llvm::ThreadPoolStrategy::apply_thread_strategy(
278    unsigned ThreadPoolNum) const {
279  Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
280  if (!Socket)
281    return;
282  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
283  GROUP_AFFINITY Affinity{};
284  Affinity.Group = Groups[*Socket].ID;
285  Affinity.Mask = Groups[*Socket].Affinity;
286  SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
287}
288
289llvm::BitVector llvm::get_thread_affinity_mask() {
290  GROUP_AFFINITY Affinity{};
291  GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
292
293  static unsigned All =
294      aggregate(getProcessorGroups(),
295                [](const ProcessorGroup &G) { return G.AllThreads; });
296
297  unsigned StartOffset =
298      aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
299        return G.ID < Affinity.Group ? G.AllThreads : 0;
300      });
301
302  llvm::BitVector V;
303  V.resize(All);
304  for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
305    if ((Affinity.Mask >> I) & 1)
306      V.set(StartOffset + I);
307  }
308  return V;
309}
310
311unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
312