1 //===-- Perf.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Perf.h"
10 
11 #include "Plugins/Process/POSIX/ProcessPOSIXLog.h"
12 #include "lldb/Host/linux/Support.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include "llvm/Support/MathExtras.h"
15 #include "llvm/Support/MemoryBuffer.h"
16 #include <linux/version.h>
17 #include <sys/ioctl.h>
18 #include <sys/mman.h>
19 #include <sys/syscall.h>
20 #include <unistd.h>
21 
22 using namespace lldb_private;
23 using namespace process_linux;
24 using namespace llvm;
25 
26 Expected<LinuxPerfZeroTscConversion>
LoadPerfTscConversionParameters()27 lldb_private::process_linux::LoadPerfTscConversionParameters() {
28 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
29   lldb::pid_t pid = getpid();
30   perf_event_attr attr;
31   memset(&attr, 0, sizeof(attr));
32   attr.size = sizeof(attr);
33   attr.type = PERF_TYPE_SOFTWARE;
34   attr.config = PERF_COUNT_SW_DUMMY;
35 
36   Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid);
37   if (!perf_event)
38     return perf_event.takeError();
39   if (Error mmap_err =
40           perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0,
41                                              /*num_aux_pages=*/0,
42                                              /*data_buffer_write=*/false))
43     return std::move(mmap_err);
44 
45   perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage();
46   if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) {
47     return LinuxPerfZeroTscConversion{
48         mmap_metada.time_mult, mmap_metada.time_shift, {mmap_metada.time_zero}};
49   } else {
50     auto err_cap =
51         !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero";
52     std::string err_msg =
53         llvm::formatv("Can't get TSC to real time conversion values. "
54                       "perf_event capability '{0}' not supported.",
55                       err_cap);
56     return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
57   }
58 #else
59   std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12";
60   return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
61 #endif
62 }
63 
operator ()(void * ptr)64 void resource_handle::MmapDeleter::operator()(void *ptr) {
65   if (m_bytes && ptr != nullptr)
66     munmap(ptr, m_bytes);
67 }
68 
operator ()(long * ptr)69 void resource_handle::FileDescriptorDeleter::operator()(long *ptr) {
70   if (ptr == nullptr)
71     return;
72   if (*ptr == -1)
73     return;
74   close(*ptr);
75   std::default_delete<long>()(ptr);
76 }
77 
Init(perf_event_attr & attr,std::optional<lldb::pid_t> pid,std::optional<lldb::cpu_id_t> cpu,std::optional<long> group_fd,unsigned long flags)78 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
79                                           std::optional<lldb::pid_t> pid,
80                                           std::optional<lldb::cpu_id_t> cpu,
81                                           std::optional<long> group_fd,
82                                           unsigned long flags) {
83   errno = 0;
84   long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(-1),
85                     cpu.value_or(-1), group_fd.value_or(-1), flags);
86   if (fd == -1) {
87     std::string err_msg =
88         llvm::formatv("perf event syscall failed: {0}", std::strerror(errno));
89     return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
90   }
91   return PerfEvent(fd, !attr.disabled);
92 }
93 
Init(perf_event_attr & attr,std::optional<lldb::pid_t> pid,std::optional<lldb::cpu_id_t> cpu)94 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
95                                           std::optional<lldb::pid_t> pid,
96                                           std::optional<lldb::cpu_id_t> cpu) {
97   return Init(attr, pid, cpu, -1, 0);
98 }
99 
100 llvm::Expected<resource_handle::MmapUP>
DoMmap(void * addr,size_t length,int prot,int flags,long int offset,llvm::StringRef buffer_name)101 PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags,
102                   long int offset, llvm::StringRef buffer_name) {
103   errno = 0;
104   auto mmap_result = ::mmap(addr, length, prot, flags, GetFd(), offset);
105 
106   if (mmap_result == MAP_FAILED) {
107     std::string err_msg =
108         llvm::formatv("perf event mmap allocation failed for {0}: {1}",
109                       buffer_name, std::strerror(errno));
110     return createStringError(inconvertibleErrorCode(), err_msg);
111   }
112   return resource_handle::MmapUP(mmap_result, length);
113 }
114 
MmapMetadataAndDataBuffer(size_t num_data_pages,bool data_buffer_write)115 llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages,
116                                                  bool data_buffer_write) {
117   size_t mmap_size = (num_data_pages + 1) * getpagesize();
118   if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap(
119           nullptr, mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0),
120           MAP_SHARED, 0, "metadata and data buffer")) {
121     m_metadata_data_base = std::move(mmap_metadata_data.get());
122     return Error::success();
123   } else
124     return mmap_metadata_data.takeError();
125 }
126 
MmapAuxBuffer(size_t num_aux_pages)127 llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) {
128 #ifndef PERF_ATTR_SIZE_VER5
129   return createStringError(inconvertibleErrorCode(),
130                            "Intel PT Linux perf event not supported");
131 #else
132   if (num_aux_pages == 0)
133     return Error::success();
134 
135   perf_event_mmap_page &metadata_page = GetMetadataPage();
136 
137   metadata_page.aux_offset =
138       metadata_page.data_offset + metadata_page.data_size;
139   metadata_page.aux_size = num_aux_pages * getpagesize();
140 
141   if (Expected<resource_handle::MmapUP> mmap_aux =
142           DoMmap(nullptr, metadata_page.aux_size, PROT_READ, MAP_SHARED,
143                  metadata_page.aux_offset, "aux buffer")) {
144     m_aux_base = std::move(mmap_aux.get());
145     return Error::success();
146   } else
147     return mmap_aux.takeError();
148 #endif
149 }
150 
MmapMetadataAndBuffers(size_t num_data_pages,size_t num_aux_pages,bool data_buffer_write)151 llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages,
152                                               size_t num_aux_pages,
153                                               bool data_buffer_write) {
154   if (num_data_pages != 0 && !isPowerOf2_64(num_data_pages))
155     return llvm::createStringError(
156         llvm::inconvertibleErrorCode(),
157         llvm::formatv("Number of data pages must be a power of 2, got: {0}",
158                       num_data_pages));
159   if (num_aux_pages != 0 && !isPowerOf2_64(num_aux_pages))
160     return llvm::createStringError(
161         llvm::inconvertibleErrorCode(),
162         llvm::formatv("Number of aux pages must be a power of 2, got: {0}",
163                       num_aux_pages));
164   if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write))
165     return err;
166   if (Error err = MmapAuxBuffer(num_aux_pages))
167     return err;
168   return Error::success();
169 }
170 
GetFd() const171 long PerfEvent::GetFd() const { return *(m_fd.get()); }
172 
GetMetadataPage() const173 perf_event_mmap_page &PerfEvent::GetMetadataPage() const {
174   return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get());
175 }
176 
GetDataBuffer() const177 ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const {
178 #ifndef PERF_ATTR_SIZE_VER5
179   llvm_unreachable("Intel PT Linux perf event not supported");
180 #else
181   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
182   return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) +
183               mmap_metadata.data_offset,
184           static_cast<size_t>(mmap_metadata.data_size)};
185 #endif
186 }
187 
GetAuxBuffer() const188 ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const {
189 #ifndef PERF_ATTR_SIZE_VER5
190   llvm_unreachable("Intel PT Linux perf event not supported");
191 #else
192   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
193   return {reinterpret_cast<uint8_t *>(m_aux_base.get()),
194           static_cast<size_t>(mmap_metadata.aux_size)};
195 #endif
196 }
197 
GetReadOnlyDataBuffer()198 Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() {
199   // The following code assumes that the protection level of the DATA page
200   // is PROT_READ. If PROT_WRITE is used, then reading would require that
201   // this piece of code updates some pointers. See more about data_tail
202   // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
203 
204 #ifndef PERF_ATTR_SIZE_VER5
205   return createStringError(inconvertibleErrorCode(),
206                            "Intel PT Linux perf event not supported");
207 #else
208   bool was_enabled = m_enabled;
209   if (Error err = DisableWithIoctl())
210     return std::move(err);
211 
212   /**
213    * The data buffer and aux buffer have different implementations
214    * with respect to their definition of head pointer when using PROD_READ only.
215    * In the case of Aux data buffer the head always wraps around the aux buffer
216    * and we don't need to care about it, whereas the data_head keeps
217    * increasing and needs to be wrapped by modulus operator
218    */
219   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
220 
221   ArrayRef<uint8_t> data = GetDataBuffer();
222   uint64_t data_head = mmap_metadata.data_head;
223   uint64_t data_size = mmap_metadata.data_size;
224   std::vector<uint8_t> output;
225   output.reserve(data.size());
226 
227   if (data_head > data_size) {
228     uint64_t actual_data_head = data_head % data_size;
229     // The buffer has wrapped, so we first the oldest chunk of data
230     output.insert(output.end(), data.begin() + actual_data_head, data.end());
231     // And we we read the most recent chunk of data
232     output.insert(output.end(), data.begin(), data.begin() + actual_data_head);
233   } else {
234     // There's been no wrapping, so we just read linearly
235     output.insert(output.end(), data.begin(), data.begin() + data_head);
236   }
237 
238   if (was_enabled) {
239     if (Error err = EnableWithIoctl())
240       return std::move(err);
241   }
242 
243   return output;
244 #endif
245 }
246 
GetReadOnlyAuxBuffer()247 Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() {
248   // The following code assumes that the protection level of the AUX page
249   // is PROT_READ. If PROT_WRITE is used, then reading would require that
250   // this piece of code updates some pointers. See more about aux_tail
251   // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
252 
253 #ifndef PERF_ATTR_SIZE_VER5
254   return createStringError(inconvertibleErrorCode(),
255                            "Intel PT Linux perf event not supported");
256 #else
257   bool was_enabled = m_enabled;
258   if (Error err = DisableWithIoctl())
259     return std::move(err);
260 
261   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
262 
263   ArrayRef<uint8_t> data = GetAuxBuffer();
264   uint64_t aux_head = mmap_metadata.aux_head;
265   std::vector<uint8_t> output;
266   output.reserve(data.size());
267 
268   /**
269    * When configured as ring buffer, the aux buffer keeps wrapping around
270    * the buffer and its not possible to detect how many times the buffer
271    * wrapped. Initially the buffer is filled with zeros,as shown below
272    * so in order to get complete buffer we first copy firstpartsize, followed
273    * by any left over part from beginning to aux_head
274    *
275    * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size
276    *                 aux_head->||<- firstpartsize  ->|
277    *
278    * */
279 
280   output.insert(output.end(), data.begin() + aux_head, data.end());
281   output.insert(output.end(), data.begin(), data.begin() + aux_head);
282 
283   if (was_enabled) {
284     if (Error err = EnableWithIoctl())
285       return std::move(err);
286   }
287 
288   return output;
289 #endif
290 }
291 
DisableWithIoctl()292 Error PerfEvent::DisableWithIoctl() {
293   if (!m_enabled)
294     return Error::success();
295 
296   if (ioctl(*m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0)
297     return createStringError(inconvertibleErrorCode(),
298                              "Can't disable perf event. %s",
299                              std::strerror(errno));
300 
301   m_enabled = false;
302   return Error::success();
303 }
304 
IsEnabled() const305 bool PerfEvent::IsEnabled() const { return m_enabled; }
306 
EnableWithIoctl()307 Error PerfEvent::EnableWithIoctl() {
308   if (m_enabled)
309     return Error::success();
310 
311   if (ioctl(*m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0)
312     return createStringError(inconvertibleErrorCode(),
313                              "Can't enable perf event. %s",
314                              std::strerror(errno));
315 
316   m_enabled = true;
317   return Error::success();
318 }
319 
GetEffectiveDataBufferSize() const320 size_t PerfEvent::GetEffectiveDataBufferSize() const {
321 #ifndef PERF_ATTR_SIZE_VER5
322   llvm_unreachable("Intel PT Linux perf event not supported");
323 #else
324   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
325   if (mmap_metadata.data_head < mmap_metadata.data_size)
326     return mmap_metadata.data_head;
327   else
328     return mmap_metadata.data_size; // The buffer has wrapped.
329 #endif
330 }
331 
332 Expected<PerfEvent>
CreateContextSwitchTracePerfEvent(lldb::cpu_id_t cpu_id,const PerfEvent * parent_perf_event)333 lldb_private::process_linux::CreateContextSwitchTracePerfEvent(
334     lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) {
335   Log *log = GetLog(POSIXLog::Trace);
336 #ifndef PERF_ATTR_SIZE_VER5
337   return createStringError(inconvertibleErrorCode(),
338                            "Intel PT Linux perf event not supported");
339 #else
340   perf_event_attr attr;
341   memset(&attr, 0, sizeof(attr));
342   attr.size = sizeof(attr);
343   attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
344   attr.type = PERF_TYPE_SOFTWARE;
345   attr.context_switch = 1;
346   attr.exclude_kernel = 1;
347   attr.sample_id_all = 1;
348   attr.exclude_hv = 1;
349   attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false;
350 
351   // The given perf configuration will produce context switch records of 32
352   // bytes each. Assuming that every context switch will be emitted twice (one
353   // for context switch ins and another one for context switch outs), and that a
354   // context switch will happen at least every half a millisecond per core, we
355   // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more
356   // than what a regular intel pt trace can get. Pessimistically we pick as
357   // 32KiB for the size of our context switch trace.
358 
359   uint64_t data_buffer_size = 32768;
360   uint64_t data_buffer_numpages = data_buffer_size / getpagesize();
361 
362   LLDB_LOG(log, "Will create context switch trace buffer of size {0}",
363            data_buffer_size);
364 
365   std::optional<long> group_fd;
366   if (parent_perf_event)
367     group_fd = parent_perf_event->GetFd();
368 
369   if (Expected<PerfEvent> perf_event = PerfEvent::Init(
370           attr, /*pid=*/std::nullopt, cpu_id, group_fd, /*flags=*/0)) {
371     if (Error mmap_err = perf_event->MmapMetadataAndBuffers(
372             data_buffer_numpages, 0, /*data_buffer_write=*/false)) {
373       return std::move(mmap_err);
374     }
375     return perf_event;
376   } else {
377     return perf_event.takeError();
378   }
379 #endif
380 }
381