1 //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a JITEventListener object that tells perf about JITted
10 // functions, including source line information.
11 //
12 // Documentation for perf jit integration is available at:
13 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
14 // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
21 #include "llvm/ExecutionEngine/JITEventListener.h"
22 #include "llvm/Object/ObjectFile.h"
23 #include "llvm/Object/SymbolSize.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Errno.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Mutex.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Threading.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <mutex>
34 
35 #include <sys/mman.h>  // mmap()
36 #include <time.h>      // clock_gettime(), time(), localtime_r() */
37 #include <unistd.h>    // for read(), close()
38 
39 using namespace llvm;
40 using namespace llvm::object;
41 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
42 
43 namespace {
44 
45 // language identifier (XXX: should we generate something better from debug
46 // info?)
47 #define JIT_LANG "llvm-IR"
48 #define LLVM_PERF_JIT_MAGIC                                                    \
49   ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
50    (uint32_t)'D')
51 #define LLVM_PERF_JIT_VERSION 1
52 
53 // bit 0: set if the jitdump file is using an architecture-specific timestamp
54 // clock source
55 #define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
56 
57 struct LLVMPerfJitHeader;
58 
59 class PerfJITEventListener : public JITEventListener {
60 public:
61   PerfJITEventListener();
62   ~PerfJITEventListener() {
63     if (MarkerAddr)
64       CloseMarker();
65   }
66 
67   void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
68                           const RuntimeDyld::LoadedObjectInfo &L) override;
69   void notifyFreeingObject(ObjectKey K) override;
70 
71 private:
72   bool InitDebuggingDir();
73   bool OpenMarker();
74   void CloseMarker();
75   static bool FillMachine(LLVMPerfJitHeader &hdr);
76 
77   void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
78                   uint64_t CodeSize);
79   void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
80 
81   // cache lookups
82   sys::Process::Pid Pid;
83 
84   // base directory for output data
85   std::string JitPath;
86 
87   // output data stream, closed via Dumpstream
88   int DumpFd = -1;
89 
90   // output data stream
91   std::unique_ptr<raw_fd_ostream> Dumpstream;
92 
93   // prevent concurrent dumps from messing up the output file
94   sys::Mutex Mutex;
95 
96   // perf mmap marker
97   void *MarkerAddr = NULL;
98 
99   // perf support ready
100   bool SuccessfullyInitialized = false;
101 
102   // identifier for functions, primarily to identify when moving them around
103   uint64_t CodeGeneration = 1;
104 };
105 
106 // The following are POD struct definitions from the perf jit specification
107 
108 enum LLVMPerfJitRecordType {
109   JIT_CODE_LOAD = 0,
110   JIT_CODE_MOVE = 1, // not emitted, code isn't moved
111   JIT_CODE_DEBUG_INFO = 2,
112   JIT_CODE_CLOSE = 3,          // not emitted, unnecessary
113   JIT_CODE_UNWINDING_INFO = 4, // not emitted
114 
115   JIT_CODE_MAX
116 };
117 
118 struct LLVMPerfJitHeader {
119   uint32_t Magic;     // characters "JiTD"
120   uint32_t Version;   // header version
121   uint32_t TotalSize; // total size of header
122   uint32_t ElfMach;   // elf mach target
123   uint32_t Pad1;      // reserved
124   uint32_t Pid;
125   uint64_t Timestamp; // timestamp
126   uint64_t Flags;     // flags
127 };
128 
129 // record prefix (mandatory in each record)
130 struct LLVMPerfJitRecordPrefix {
131   uint32_t Id; // record type identifier
132   uint32_t TotalSize;
133   uint64_t Timestamp;
134 };
135 
136 struct LLVMPerfJitRecordCodeLoad {
137   LLVMPerfJitRecordPrefix Prefix;
138 
139   uint32_t Pid;
140   uint32_t Tid;
141   uint64_t Vma;
142   uint64_t CodeAddr;
143   uint64_t CodeSize;
144   uint64_t CodeIndex;
145 };
146 
147 struct LLVMPerfJitDebugEntry {
148   uint64_t Addr;
149   int Lineno;  // source line number starting at 1
150   int Discrim; // column discriminator, 0 is default
151   // followed by null terminated filename, \xff\0 if same as previous entry
152 };
153 
154 struct LLVMPerfJitRecordDebugInfo {
155   LLVMPerfJitRecordPrefix Prefix;
156 
157   uint64_t CodeAddr;
158   uint64_t NrEntry;
159   // followed by NrEntry LLVMPerfJitDebugEntry records
160 };
161 
162 static inline uint64_t timespec_to_ns(const struct timespec *ts) {
163   const uint64_t NanoSecPerSec = 1000000000;
164   return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
165 }
166 
167 static inline uint64_t perf_get_timestamp(void) {
168   struct timespec ts;
169   int ret;
170 
171   ret = clock_gettime(CLOCK_MONOTONIC, &ts);
172   if (ret)
173     return 0;
174 
175   return timespec_to_ns(&ts);
176 }
177 
178 PerfJITEventListener::PerfJITEventListener()
179     : Pid(sys::Process::getProcessId()) {
180   // check if clock-source is supported
181   if (!perf_get_timestamp()) {
182     errs() << "kernel does not support CLOCK_MONOTONIC\n";
183     return;
184   }
185 
186   if (!InitDebuggingDir()) {
187     errs() << "could not initialize debugging directory\n";
188     return;
189   }
190 
191   std::string Filename;
192   raw_string_ostream FilenameBuf(Filename);
193   FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
194 
195   // Need to open ourselves, because we need to hand the FD to OpenMarker() and
196   // raw_fd_ostream doesn't expose the FD.
197   using sys::fs::openFileForWrite;
198   if (auto EC =
199           openFileForReadWrite(FilenameBuf.str(), DumpFd,
200 			       sys::fs::CD_CreateNew, sys::fs::OF_None)) {
201     errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
202            << EC.message() << "\n";
203     return;
204   }
205 
206   Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true);
207 
208   LLVMPerfJitHeader Header = {0};
209   if (!FillMachine(Header))
210     return;
211 
212   // signal this process emits JIT information
213   if (!OpenMarker())
214     return;
215 
216   // emit dumpstream header
217   Header.Magic = LLVM_PERF_JIT_MAGIC;
218   Header.Version = LLVM_PERF_JIT_VERSION;
219   Header.TotalSize = sizeof(Header);
220   Header.Pid = Pid;
221   Header.Timestamp = perf_get_timestamp();
222   Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
223 
224   // Everything initialized, can do profiling now.
225   if (!Dumpstream->has_error())
226     SuccessfullyInitialized = true;
227 }
228 
229 void PerfJITEventListener::notifyObjectLoaded(
230     ObjectKey K, const ObjectFile &Obj,
231     const RuntimeDyld::LoadedObjectInfo &L) {
232 
233   if (!SuccessfullyInitialized)
234     return;
235 
236   OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
237   const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
238 
239   // Get the address of the object image for use as a unique identifier
240   std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
241 
242   // Use symbol info to iterate over functions in the object.
243   for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
244     SymbolRef Sym = P.first;
245     std::string SourceFileName;
246 
247     Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
248     if (!SymTypeOrErr) {
249       // There's not much we can with errors here
250       consumeError(SymTypeOrErr.takeError());
251       continue;
252     }
253     SymbolRef::Type SymType = *SymTypeOrErr;
254     if (SymType != SymbolRef::ST_Function)
255       continue;
256 
257     Expected<StringRef> Name = Sym.getName();
258     if (!Name) {
259       consumeError(Name.takeError());
260       continue;
261     }
262 
263     Expected<uint64_t> AddrOrErr = Sym.getAddress();
264     if (!AddrOrErr) {
265       consumeError(AddrOrErr.takeError());
266       continue;
267     }
268     uint64_t Size = P.second;
269     object::SectionedAddress Address;
270     Address.Address = *AddrOrErr;
271 
272     uint64_t SectionIndex = object::SectionedAddress::UndefSection;
273     if (auto SectOrErr = Sym.getSection())
274         if (*SectOrErr != Obj.section_end())
275             SectionIndex = SectOrErr.get()->getIndex();
276 
277     // According to spec debugging info has to come before loading the
278     // corresonding code load.
279     DILineInfoTable Lines = Context->getLineInfoForAddressRange(
280         {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
281 
282     NotifyDebug(*AddrOrErr, Lines);
283     NotifyCode(Name, *AddrOrErr, Size);
284   }
285 
286   // avoid races with writes
287   std::lock_guard<sys::Mutex> Guard(Mutex);
288 
289   Dumpstream->flush();
290 }
291 
292 void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
293   // perf currently doesn't have an interface for unloading. But munmap()ing the
294   // code section does, so that's ok.
295 }
296 
297 bool PerfJITEventListener::InitDebuggingDir() {
298   time_t Time;
299   struct tm LocalTime;
300   char TimeBuffer[sizeof("YYYYMMDD")];
301   SmallString<64> Path;
302 
303   // search for location to dump data to
304   if (const char *BaseDir = getenv("JITDUMPDIR"))
305     Path.append(BaseDir);
306   else if (!sys::path::home_directory(Path))
307     Path = ".";
308 
309   // create debug directory
310   Path += "/.debug/jit/";
311   if (auto EC = sys::fs::create_directories(Path)) {
312     errs() << "could not create jit cache directory " << Path << ": "
313            << EC.message() << "\n";
314     return false;
315   }
316 
317   // create unique directory for dump data related to this process
318   time(&Time);
319   localtime_r(&Time, &LocalTime);
320   strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
321   Path += JIT_LANG "-jit-";
322   Path += TimeBuffer;
323 
324   SmallString<128> UniqueDebugDir;
325 
326   using sys::fs::createUniqueDirectory;
327   if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
328     errs() << "could not create unique jit cache directory " << UniqueDebugDir
329            << ": " << EC.message() << "\n";
330     return false;
331   }
332 
333   JitPath = std::string(UniqueDebugDir.str());
334 
335   return true;
336 }
337 
338 bool PerfJITEventListener::OpenMarker() {
339   // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
340   // is captured either live (perf record running when we mmap) or in deferred
341   // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
342   // file for more meta data info about the jitted code. Perf report/annotate
343   // detect this special filename and process the jitdump file.
344   //
345   // Mapping must be PROT_EXEC to ensure it is captured by perf record
346   // even when not using -d option.
347   MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
348                       PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
349 
350   if (MarkerAddr == MAP_FAILED) {
351     errs() << "could not mmap JIT marker\n";
352     return false;
353   }
354   return true;
355 }
356 
357 void PerfJITEventListener::CloseMarker() {
358   if (!MarkerAddr)
359     return;
360 
361   munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
362   MarkerAddr = nullptr;
363 }
364 
365 bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
366   char id[16];
367   struct {
368     uint16_t e_type;
369     uint16_t e_machine;
370   } info;
371 
372   size_t RequiredMemory = sizeof(id) + sizeof(info);
373 
374   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
375     MemoryBuffer::getFileSlice("/proc/self/exe",
376 			       RequiredMemory,
377 			       0);
378 
379   // This'll not guarantee that enough data was actually read from the
380   // underlying file. Instead the trailing part of the buffer would be
381   // zeroed. Given the ELF signature check below that seems ok though,
382   // it's unlikely that the file ends just after that, and the
383   // consequence would just be that perf wouldn't recognize the
384   // signature.
385   if (auto EC = MB.getError()) {
386     errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
387     return false;
388   }
389 
390   memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
391   memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
392 
393   // check ELF signature
394   if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
395     errs() << "invalid elf signature\n";
396     return false;
397   }
398 
399   hdr.ElfMach = info.e_machine;
400 
401   return true;
402 }
403 
404 void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
405                                       uint64_t CodeAddr, uint64_t CodeSize) {
406   assert(SuccessfullyInitialized);
407 
408   // 0 length functions can't have samples.
409   if (CodeSize == 0)
410     return;
411 
412   LLVMPerfJitRecordCodeLoad rec;
413   rec.Prefix.Id = JIT_CODE_LOAD;
414   rec.Prefix.TotalSize = sizeof(rec) +        // debug record itself
415                          Symbol->size() + 1 + // symbol name
416                          CodeSize;            // and code
417   rec.Prefix.Timestamp = perf_get_timestamp();
418 
419   rec.CodeSize = CodeSize;
420   rec.Vma = 0;
421   rec.CodeAddr = CodeAddr;
422   rec.Pid = Pid;
423   rec.Tid = get_threadid();
424 
425   // avoid interspersing output
426   std::lock_guard<sys::Mutex> Guard(Mutex);
427 
428   rec.CodeIndex = CodeGeneration++; // under lock!
429 
430   Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
431   Dumpstream->write(Symbol->data(), Symbol->size() + 1);
432   Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
433 }
434 
435 void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
436                                        DILineInfoTable Lines) {
437   assert(SuccessfullyInitialized);
438 
439   // Didn't get useful debug info.
440   if (Lines.empty())
441     return;
442 
443   LLVMPerfJitRecordDebugInfo rec;
444   rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
445   rec.Prefix.TotalSize = sizeof(rec); // will be increased further
446   rec.Prefix.Timestamp = perf_get_timestamp();
447   rec.CodeAddr = CodeAddr;
448   rec.NrEntry = Lines.size();
449 
450   // compute total size size of record (variable due to filenames)
451   DILineInfoTable::iterator Begin = Lines.begin();
452   DILineInfoTable::iterator End = Lines.end();
453   for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
454     DILineInfo &line = It->second;
455     rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
456     rec.Prefix.TotalSize += line.FileName.size() + 1;
457   }
458 
459   // The debug_entry describes the source line information. It is defined as
460   // follows in order:
461   // * uint64_t code_addr: address of function for which the debug information
462   // is generated
463   // * uint32_t line     : source file line number (starting at 1)
464   // * uint32_t discrim  : column discriminator, 0 is default
465   // * char name[n]      : source file name in ASCII, including null termination
466 
467   // avoid interspersing output
468   std::lock_guard<sys::Mutex> Guard(Mutex);
469 
470   Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
471 
472   for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
473     LLVMPerfJitDebugEntry LineInfo;
474     DILineInfo &Line = It->second;
475 
476     LineInfo.Addr = It->first;
477     // The function re-created by perf is preceded by a elf
478     // header. Need to adjust for that, otherwise the results are
479     // wrong.
480     LineInfo.Addr += 0x40;
481     LineInfo.Lineno = Line.Line;
482     LineInfo.Discrim = Line.Discriminator;
483 
484     Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
485                       sizeof(LineInfo));
486     Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
487   }
488 }
489 
490 } // end anonymous namespace
491 
492 namespace llvm {
493 JITEventListener *JITEventListener::createPerfJITEventListener() {
494   // There should be only a single event listener per process, otherwise perf
495   // gets confused.
496   static PerfJITEventListener PerfListener;
497   return &PerfListener;
498 }
499 
500 } // namespace llvm
501 
502 LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void)
503 {
504   return wrap(JITEventListener::createPerfJITEventListener());
505 }
506