1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd
4 ** All rights reserved.
5 ** For any questions to The Qt Company, please use contact form at http://www.qt.io/contact-us
6 **
7 ** This file is part of the Qt Enterprise Perf Profiler Add-on.
8 **
9 ** GNU General Public License Usage
10 ** This file may be used under the terms of the GNU General Public License
11 ** version 3 as published by the Free Software Foundation and appearing in
12 ** the file LICENSE.GPLv3 included in the packaging of this file. Please
13 ** review the following information to ensure the GNU General Public License
14 ** requirements will be met: https://www.gnu.org/licenses/gpl.html.
15 **
16 ** If you have questions regarding the use of this file, please use
17 ** contact form at http://www.qt.io/contact-us
18 **
19 ****************************************************************************/
20 
21 #include "perfregisterinfo.h"
22 #include "perfsymboltable.h"
23 #include "perfunwind.h"
24 
25 #include <QDebug>
26 #include <QDir>
27 #include <QVersionNumber>
28 #include <QtEndian>
29 
30 #include <cstring>
31 
32 const qint32 PerfUnwind::s_kernelPid = -1;
33 
qHash(const PerfUnwind::Location & location,uint seed)34 uint qHash(const PerfUnwind::Location &location, uint seed)
35 {
36     QtPrivate::QHashCombine hash;
37     seed = hash(seed, location.address);
38     seed = hash(seed, location.relAddr);
39     seed = hash(seed, location.file);
40     seed = hash(seed, location.pid);
41     seed = hash(seed, location.line);
42     seed = hash(seed, location.column);
43     return seed;
44 }
45 
operator ==(const PerfUnwind::Location & a,const PerfUnwind::Location & b)46 bool operator==(const PerfUnwind::Location &a, const PerfUnwind::Location &b)
47 {
48     return a.address == b.address && a.relAddr == b.relAddr && a.file == b.file && a.pid == b.pid && a.line == b.line
49             && a.column == b.column;
50 }
51 
addEventTime(quint64 time)52 void PerfUnwind::Stats::addEventTime(quint64 time)
53 {
54     if (time && time < maxTime)
55         maxReorderTime = std::max(maxReorderTime, maxTime - time);
56     else
57         maxTime = time;
58 }
59 
finishedRound()60 void PerfUnwind::Stats::finishedRound()
61 {
62     numSamples += numSamplesInRound;
63     numMmaps += numMmapsInRound;
64 
65     maxSamplesPerRound = std::max(maxSamplesPerRound, numSamplesInRound);
66     maxMmapsPerRound = std::max(maxMmapsPerRound, numMmapsInRound);
67     maxTaskEventsPerRound = std::max(maxTaskEventsPerRound, numTaskEventsInRound);
68     numSamplesInRound = 0;
69     numMmapsInRound = 0;
70     numTaskEventsInRound = 0;
71     ++numRounds;
72 
73     maxTotalEventSizePerRound = std::max(maxTotalEventSizePerRound,
74                                          totalEventSizePerRound);
75     totalEventSizePerRound = 0;
76 
77     if (lastRoundTime > 0)
78         maxTimeBetweenRounds = std::max(maxTimeBetweenRounds, maxTime - lastRoundTime);
79 
80     lastRoundTime = maxTime;
81 }
82 
find_debuginfo(Dwfl_Module * module,void ** userData,const char * moduleName,Dwarf_Addr base,const char * file,const char * debugLink,GElf_Word crc,char ** debugInfoFilename)83 static int find_debuginfo(Dwfl_Module *module, void **userData, const char *moduleName,
84                           Dwarf_Addr base, const char *file, const char *debugLink,
85                           GElf_Word crc, char **debugInfoFilename)
86 {
87     // data should have been set from PerfSymbolTable::reportElf
88     Q_ASSERT(*userData);
89     auto* symbolTable = reinterpret_cast<PerfSymbolTable*>(*userData);
90     return symbolTable->findDebugInfo(module, moduleName, base, file, debugLink, crc, debugInfoFilename);
91 }
92 
defaultDebugInfoPath()93 QString PerfUnwind::defaultDebugInfoPath()
94 {
95     return QString::fromLatin1("%1usr%1lib%1debug%2%3%1.debug%2.debug")
96             .arg(QDir::separator(), QDir::listSeparator(), QDir::homePath());
97 }
98 
defaultKallsymsPath()99 QString PerfUnwind::defaultKallsymsPath()
100 {
101     return QString::fromLatin1("%1proc%1kallsyms").arg(QDir::separator());
102 }
103 
PerfUnwind(QIODevice * output,const QString & systemRoot,const QString & debugPath,const QString & extraLibsPath,const QString & appPath,bool printStats)104 PerfUnwind::PerfUnwind(QIODevice *output, const QString &systemRoot, const QString &debugPath,
105                        const QString &extraLibsPath, const QString &appPath, bool printStats) :
106     m_output(output), m_architecture(PerfRegisterInfo::ARCH_INVALID), m_systemRoot(systemRoot),
107     m_extraLibsPath(extraLibsPath), m_appPath(appPath), m_debugPath(debugPath),
108     m_kallsymsPath(QDir::rootPath() + defaultKallsymsPath()), m_ignoreKallsymsBuildId(false),
109     m_lastEventBufferSize(1 << 20), m_maxEventBufferSize(1 << 30), m_targetEventBufferSize(1 << 25),
110     m_eventBufferSize(0), m_timeOrderViolations(0), m_lastFlushMaxTime(0)
111 {
112     m_stats.enabled = printStats;
113     m_currentUnwind.unwind = this;
114     m_offlineCallbacks.find_elf = dwfl_build_id_find_elf;
115     m_offlineCallbacks.find_debuginfo = find_debuginfo;
116     m_offlineCallbacks.section_address = dwfl_offline_section_address;
117     const QChar separator = QDir::listSeparator();
118     QByteArray newDebugInfo = (separator + debugPath + separator + appPath + separator
119                                + extraLibsPath + separator + systemRoot).toUtf8();
120     Q_ASSERT(newDebugInfo.length() >= 0);
121     const uint debugInfoLength = static_cast<uint>(newDebugInfo.length());
122     m_debugInfoPath = new char[debugInfoLength + 1];
123     m_debugInfoPath[debugInfoLength] = 0;
124     std::memcpy(m_debugInfoPath, newDebugInfo.data(), debugInfoLength);
125     m_offlineCallbacks.debuginfo_path = &m_debugInfoPath;
126 
127     if (!printStats) {
128         // Write minimal header, consisting of magic and data stream version we're going to use.
129         const char magic[] = "QPERFSTREAM";
130         output->write(magic, sizeof(magic));
131         qint32 dataStreamVersion = qToLittleEndian(qint32(QDataStream::Qt_DefaultCompiledVersion));
132         output->write(reinterpret_cast<const char *>(&dataStreamVersion), sizeof(qint32));
133     }
134 }
135 
~PerfUnwind()136 PerfUnwind::~PerfUnwind()
137 {
138     finalize();
139 
140     delete[] m_debugInfoPath;
141     qDeleteAll(m_symbolTables);
142 
143     if (m_stats.enabled) {
144         QTextStream out(m_output);
145         out << "samples: " << m_stats.numSamples << "\n";
146         out << "mmaps: " << m_stats.numMmaps << "\n";
147         out << "rounds: " << m_stats.numRounds << "\n";
148         out << "buffer flushes: " << m_stats.numBufferFlushes << "\n";
149         out << "samples time violations: " << m_stats.numTimeViolatingSamples << "\n";
150         out << "mmaps time violations: " << m_stats.numTimeViolatingMmaps << "\n";
151         out << "max samples per round: " << m_stats.maxSamplesPerRound << "\n";
152         out << "max mmaps per round: " << m_stats.maxMmapsPerRound << "\n";
153         out << "max task events per round: " << m_stats.maxTaskEventsPerRound << "\n";
154         out << "max samples per flush: " << m_stats.maxSamplesPerFlush << "\n";
155         out << "max mmaps per flush: " << m_stats.maxMmapsPerFlush << "\n";
156         out << "max task events per flush: " << m_stats.maxTaskEventsPerFlush << "\n";
157         out << "max buffer size: " << m_stats.maxBufferSize << "\n";
158         out << "max total event size per round: " << m_stats.maxTotalEventSizePerRound << "\n";
159         out << "max time: " << m_stats.maxTime << "\n";
160         out << "max time between rounds: " << m_stats.maxTimeBetweenRounds << "\n";
161         out << "max reorder time: " << m_stats.maxReorderTime << "\n";
162     }
163 }
164 
setMaxEventBufferSize(uint size)165 void PerfUnwind::setMaxEventBufferSize(uint size)
166 {
167     m_maxEventBufferSize = size;
168     if (size < m_targetEventBufferSize)
169         setTargetEventBufferSize(size);
170 }
171 
setTargetEventBufferSize(uint size)172 void PerfUnwind::setTargetEventBufferSize(uint size)
173 {
174     m_lastEventBufferSize = m_targetEventBufferSize;
175     m_targetEventBufferSize = size;
176     if (size > m_maxEventBufferSize)
177         setMaxEventBufferSize(size);
178 }
179 
revertTargetEventBufferSize()180 void PerfUnwind::revertTargetEventBufferSize()
181 {
182     setTargetEventBufferSize(m_lastEventBufferSize);
183 }
184 
hasTracePointAttributes() const185 bool PerfUnwind::hasTracePointAttributes() const
186 {
187     for (auto &attributes : m_attributes) {
188         if (attributes.type() == PerfEventAttributes::TYPE_TRACEPOINT)
189             return true;
190     }
191     return false;
192 }
193 
symbolTable(qint32 pid)194 PerfSymbolTable *PerfUnwind::symbolTable(qint32 pid)
195 {
196     PerfSymbolTable *&symbolTable = m_symbolTables[pid];
197     if (!symbolTable)
198         symbolTable = new PerfSymbolTable(pid, &m_offlineCallbacks, this);
199     return symbolTable;
200 }
201 
registerElf(const PerfRecordMmap & mmap)202 void PerfUnwind::registerElf(const PerfRecordMmap &mmap)
203 {
204     bufferEvent(mmap, &m_mmapBuffer, &m_stats.numMmapsInRound);
205 }
206 
sendBuffer(const QByteArray & buffer)207 void PerfUnwind::sendBuffer(const QByteArray &buffer)
208 {
209     if (m_stats.enabled)
210         return;
211 
212     qint32 size = qToLittleEndian(buffer.length());
213     m_output->write(reinterpret_cast<char *>(&size), sizeof(quint32));
214     m_output->write(buffer);
215 }
216 
comm(const PerfRecordComm & comm)217 void PerfUnwind::comm(const PerfRecordComm &comm)
218 {
219     const qint32 commId = resolveString(comm.comm());
220 
221     bufferEvent(TaskEvent{comm.pid(), comm.tid(), comm.time(), comm.cpu(),
222                           Command, commId},
223                 &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
224 }
225 
attr(const PerfRecordAttr & attr)226 void PerfUnwind::attr(const PerfRecordAttr &attr)
227 {
228     addAttributes(attr.attr(), attr.attr().name(), attr.ids());
229 }
230 
addAttributes(const PerfEventAttributes & attributes,const QByteArray & name,const QList<quint64> & ids)231 void PerfUnwind::addAttributes(const PerfEventAttributes &attributes, const QByteArray &name,
232                               const QList<quint64> &ids)
233 {
234     auto filteredIds = ids;
235     // If we only get one attribute, it doesn't have an ID.
236     // The default ID for samples is 0, so we assign that here,
237     // in order to look it up in analyze().
238     filteredIds << 0;
239 
240     {
241         // remove attributes that are known already
242         auto it = std::remove_if(filteredIds.begin(), filteredIds.end(),
243                                  [this] (quint64 id) {
244                                      return m_attributeIds.contains(id);
245                                 });
246         filteredIds.erase(it, filteredIds.end());
247     }
248 
249     // Switch to dynamic buffering if it's a trace point
250     if (attributes.type() == PerfEventAttributes::TYPE_TRACEPOINT && m_targetEventBufferSize == 0) {
251         qDebug() << "Trace point attributes detected. Switching to dynamic buffering";
252         revertTargetEventBufferSize();
253     }
254 
255     if (filteredIds.isEmpty())
256         return;
257 
258     const qint32 internalId = m_attributes.size();
259     m_attributes.append(attributes);
260     sendAttributes(internalId, attributes, name);
261 
262     foreach (quint64 id, filteredIds)
263         m_attributeIds[id] = internalId;
264 }
265 
sendAttributes(qint32 id,const PerfEventAttributes & attributes,const QByteArray & name)266 void PerfUnwind::sendAttributes(qint32 id, const PerfEventAttributes &attributes, const QByteArray &name)
267 {
268     const qint32 attrNameId = resolveString(name);
269 
270     QByteArray buffer;
271     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(AttributesDefinition)
272                                                << id << attributes.type()
273                                                << attributes.config() << attrNameId
274                                                << attributes.usesFrequency() << attributes.frequenyOrPeriod();
275     sendBuffer(buffer);
276 }
277 
sendEventFormat(qint32 id,const EventFormat & format)278 void PerfUnwind::sendEventFormat(qint32 id, const EventFormat &format)
279 {
280     const qint32 systemId = resolveString(format.system);
281     const qint32 nameId = resolveString(format.name);
282 
283     for (const FormatField &field : format.commonFields)
284         resolveString(field.name);
285 
286     for (const FormatField &field : format.fields)
287         resolveString(field.name);
288 
289     QByteArray buffer;
290     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(TracePointFormat) << id
291                                                << systemId << nameId << format.flags;
292     sendBuffer(buffer);
293 }
294 
lost(const PerfRecordLost & lost)295 void PerfUnwind::lost(const PerfRecordLost &lost)
296 {
297     bufferEvent(TaskEvent{lost.pid(), lost.tid(), lost.time(), lost.cpu(),
298                           LostDefinition, lost.lost()},
299                 &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
300 }
301 
features(const PerfFeatures & features)302 void PerfUnwind::features(const PerfFeatures &features)
303 {
304     tracing(features.tracingData());
305 
306     const auto &eventDescs = features.eventDesc().eventDescs;
307     for (const auto &desc : eventDescs)
308         addAttributes(desc.attrs, desc.name, desc.ids);
309 
310     const auto perfVersion = QVersionNumber::fromString(QString::fromLatin1(features.version()));
311     if (perfVersion >= QVersionNumber(3, 17) && m_timeOrderViolations == 0) {
312         if (!hasTracePointAttributes()) {
313             qDebug() << "Linux version" << features.version()
314                      << "detected. Switching to automatic buffering.";
315             setTargetEventBufferSize(0);
316         }
317     }
318 
319     QByteArray buffer;
320     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(FeaturesDefinition)
321                                                << features.hostName()
322                                                << features.osRelease()
323                                                << features.version()
324                                                << features.architecture()
325                                                << features.nrCpus()
326                                                << features.cpuDesc()
327                                                << features.cpuId()
328                                                << features.totalMem()
329                                                << features.cmdline()
330                                                << features.buildIds()
331                                                << features.cpuTopology()
332                                                << features.numaTopology()
333                                                << features.pmuMappings()
334                                                << features.groupDescs();
335     sendBuffer(buffer);
336 
337     const auto buildIds = features.buildIds();
338     m_buildIds.reserve(buildIds.size());
339     for (const auto &buildId : buildIds) {
340         m_buildIds[buildId.fileName] = buildId.id;
341     }
342 }
343 
tracing(const PerfTracingData & tracingData)344 void PerfUnwind::tracing(const PerfTracingData &tracingData)
345 {
346     m_tracingData = tracingData;
347     const auto &formats = tracingData.eventFormats();
348     for (auto it = formats.constBegin(), end = formats.constEnd(); it != end; ++it)
349         sendEventFormat(it.key(), it.value());
350 }
351 
ipIsInKernelSpace(quint64 ip) const352 bool PerfUnwind::ipIsInKernelSpace(quint64 ip) const
353 {
354     auto symbolTableIt = m_symbolTables.constFind(s_kernelPid);
355     if (symbolTableIt == m_symbolTables.constEnd())
356         return false;
357 
358     return symbolTableIt.value()->containsAddress(ip);
359 }
360 
operator <<(QDataStream & stream,const PerfUnwind::Location & location)361 QDataStream &operator<<(QDataStream &stream, const PerfUnwind::Location &location)
362 {
363     return stream << location.address << location.file << location.pid << location.line
364                   << location.column << location.parentLocationId << location.relAddr;
365 }
366 
operator <<(QDataStream & stream,const PerfUnwind::Symbol & symbol)367 QDataStream &operator<<(QDataStream &stream, const PerfUnwind::Symbol &symbol)
368 {
369     return stream << symbol.name << symbol.binary << symbol.path << symbol.isKernel << symbol.relAddr << symbol.size << symbol.actualPath;
370 }
371 
frameCallback(Dwfl_Frame * state,void * arg)372 static int frameCallback(Dwfl_Frame *state, void *arg)
373 {
374     Dwarf_Addr pc = 0;
375     PerfUnwind::UnwindInfo *ui = static_cast<PerfUnwind::UnwindInfo *>(arg);
376 
377     // do not query for activation directly, as this could potentially advance
378     // the unwinder internally - we must first ensure the module for the pc
379     // is reported
380     if (!dwfl_frame_pc(state, &pc, nullptr)
381             || (ui->maxFrames != -1 && ui->frames.length() > ui->maxFrames)
382             || pc == 0) {
383         ui->firstGuessedFrame = ui->frames.length();
384         qWarning() << dwfl_errmsg(dwfl_errno()) << ui->firstGuessedFrame;
385         return DWARF_CB_ABORT;
386     }
387 
388     auto* symbolTable = ui->unwind->symbolTable(ui->sample->pid());
389 
390     // ensure the module is reported
391     // if that fails, we will still try to unwind based on frame pointer
392     symbolTable->module(pc);
393 
394     // now we can query for the activation flag
395     bool isactivation = false;
396     dwfl_frame_pc(state, &pc, &isactivation);
397     Dwarf_Addr pc_adjusted = pc - (isactivation ? 0 : 1);
398 
399     // isKernel = false as unwinding generally only works on user code
400     bool isInterworking = false;
401     const auto frame = symbolTable->lookupFrame(pc_adjusted, false, &isInterworking);
402     if (symbolTable->cacheIsDirty())
403         return DWARF_CB_ABORT;
404 
405     ui->frames.append(frame);
406     if (isInterworking && ui->frames.length() == 1)
407         ui->isInterworking = true;
408     return DWARF_CB_OK;
409 }
410 
unwindStack()411 void PerfUnwind::unwindStack()
412 {
413     Dwfl *dwfl = symbolTable(m_currentUnwind.sample->pid())->attachDwfl(&m_currentUnwind);
414     if (!dwfl)
415         return;
416 
417     dwfl_getthread_frames(dwfl, m_currentUnwind.sample->pid(), frameCallback, &m_currentUnwind);
418     if (m_currentUnwind.isInterworking) {
419         QVector<qint32> savedFrames = m_currentUnwind.frames;
420 
421         // If it's an ARM interworking veneer, we assume that we can find a return address in LR and
422         // no stack has been used for the veneer itself.
423         // The reasoning is that any symbol jumped to by the veneer has to work with or without
424         // using the veneer. It needs a valid return address and when it returns the stack pointer
425         // must be the same in both cases. Thus, the veneer cannot touch the stack pointer and there
426         // has to be a return address in LR, provided by the caller.
427         // So, just try again, and make setInitialRegisters use LR for IP.
428         m_currentUnwind.frames.resize(1); // Keep the actual veneer frame
429         dwfl_getthread_frames(dwfl, m_currentUnwind.sample->pid(), frameCallback, &m_currentUnwind);
430 
431         // If the LR trick didn't result in a longer stack trace than the regular unwinding, just
432         // revert it.
433         if (savedFrames.length() > m_currentUnwind.frames.length())
434             m_currentUnwind.frames.swap(savedFrames);
435     }
436 }
437 
resolveCallchain()438 void PerfUnwind::resolveCallchain()
439 {
440     bool isKernel = false;
441     bool addedUserFrames = false;
442     PerfSymbolTable *symbols = symbolTable(m_currentUnwind.sample->pid());
443 
444     auto reportIp = [&](quint64 ip) -> bool {
445         symbols->attachDwfl(&m_currentUnwind);
446         m_currentUnwind.frames.append(symbols->lookupFrame(ip, isKernel,
447                                             &m_currentUnwind.isInterworking));
448         return !symbols->cacheIsDirty();
449     };
450 
451     // when we have a non-empty branch stack, we need to skip any non-kernel IPs
452     // in the normal callchain. The branch stack contains the non-kernel IPs then.
453     const bool hasBranchStack = !m_currentUnwind.sample->branchStack().isEmpty();
454 
455     for (int i = 0, c = m_currentUnwind.sample->callchain().size(); i < c; ++i) {
456         quint64 ip = m_currentUnwind.sample->callchain()[i];
457 
458         if (ip > PERF_CONTEXT_MAX) {
459             switch (ip) {
460             case PERF_CONTEXT_HV: // hypervisor
461             case PERF_CONTEXT_KERNEL:
462                 if (!isKernel) {
463                     symbols = symbolTable(s_kernelPid);
464                     isKernel = true;
465                 }
466                 break;
467             case PERF_CONTEXT_USER:
468                 if (isKernel) {
469                     symbols = symbolTable(m_currentUnwind.sample->pid());
470                     isKernel = false;
471                 }
472                 break;
473             default:
474                 qWarning() << "invalid callchain context" << Qt::hex << ip;
475                 return;
476             }
477         } else {
478             // prefer user frames from branch stack if available
479             if (hasBranchStack && !isKernel)
480                 break;
481 
482             // sometimes it skips the first user frame.
483             if (!addedUserFrames && !isKernel && ip != m_currentUnwind.sample->ip()) {
484                 if (!reportIp(m_currentUnwind.sample->ip()))
485                     return;
486             }
487 
488             if (!reportIp(ip))
489                 return;
490 
491             if (!isKernel)
492                 addedUserFrames = true;
493         }
494     }
495 
496     // when we are still in the kernel, we cannot have a meaningful branch stack
497     if (isKernel)
498         return;
499 
500     // if available, also resolve the callchain stored in the branch stack:
501     // caller is stored in "from", callee is stored in "to"
502     // so the branch is made up of the first callee and all callers
503     for (int i = 0, c = m_currentUnwind.sample->branchStack().size(); i < c; ++i) {
504         const auto& entry = m_currentUnwind.sample->branchStack()[i];
505         if (i == 0 && !reportIp(entry.to))
506             return;
507         if (!reportIp(entry.from))
508             return;
509     }
510 }
511 
sample(const PerfRecordSample & sample)512 void PerfUnwind::sample(const PerfRecordSample &sample)
513 {
514     bufferEvent(sample, &m_sampleBuffer, &m_stats.numSamplesInRound);
515 }
516 
517 template<typename Number>
readFromArray(const QByteArray & data,quint32 offset,bool byteSwap)518 Number readFromArray(const QByteArray &data, quint32 offset, bool byteSwap)
519 {
520     const Number number = *reinterpret_cast<const Number *>(data.data() + offset);
521     return byteSwap ? qbswap(number) : number;
522 }
523 
readTraceItem(const QByteArray & data,quint32 offset,quint32 size,bool isSigned,bool byteSwap)524 QVariant readTraceItem(const QByteArray &data, quint32 offset, quint32 size, bool isSigned,
525                        bool byteSwap)
526 {
527     if (isSigned) {
528         switch (size) {
529         case 1: return readFromArray<qint8>(data, offset, byteSwap);
530         case 2: return readFromArray<qint16>(data, offset, byteSwap);
531         case 4: return readFromArray<qint32>(data, offset, byteSwap);
532         case 8: return readFromArray<qint64>(data, offset, byteSwap);
533         default: return QVariant::Invalid;
534         }
535     } else {
536         switch (size) {
537         case 1: return readFromArray<quint8>(data, offset, byteSwap);
538         case 2: return readFromArray<quint16>(data, offset, byteSwap);
539         case 4: return readFromArray<quint32>(data, offset, byteSwap);
540         case 8: return readFromArray<quint64>(data, offset, byteSwap);
541         default: return QVariant::Invalid;
542         }
543     }
544 }
545 
readTraceData(const QByteArray & data,const FormatField & field,bool byteSwap)546 QVariant PerfUnwind::readTraceData(const QByteArray &data, const FormatField &field, bool byteSwap)
547 {
548     // TODO: validate that it actually works like this.
549     if (field.offset > quint32(std::numeric_limits<int>::max())
550             || field.size > quint32(std::numeric_limits<int>::max())
551             || field.offset + field.size > quint32(std::numeric_limits<int>::max())
552             || static_cast<int>(field.offset + field.size) > data.length()) {
553         return QVariant::Invalid;
554     }
555 
556     if (field.flags & FIELD_IS_ARRAY) {
557         if (field.flags & FIELD_IS_DYNAMIC) {
558             const quint32 dynamicOffsetAndSize = readTraceItem(data, field.offset, field.size,
559                                                                false, byteSwap).toUInt();
560             FormatField newField = field;
561             newField.offset = dynamicOffsetAndSize & 0xffff;
562             newField.size = dynamicOffsetAndSize >> 16;
563             newField.flags = field.flags & (~FIELD_IS_DYNAMIC);
564             return readTraceData(data, newField, byteSwap);
565         }
566         if (field.flags & FIELD_IS_STRING) {
567             return data.mid(static_cast<int>(field.offset), static_cast<int>(field.size));
568         } else {
569             QList<QVariant> result;
570             for (quint32 i = 0; i < field.size; i += field.elementsize) {
571                 result.append(readTraceItem(data, field.offset + i, field.elementsize,
572                                             field.flags & FIELD_IS_SIGNED, byteSwap));
573             }
574             return result;
575         }
576     } else {
577         return readTraceItem(data, field.offset, field.size, field.flags & FIELD_IS_SIGNED,
578                              byteSwap);
579     }
580 }
581 
analyze(const PerfRecordSample & sample)582 void PerfUnwind::analyze(const PerfRecordSample &sample)
583 {
584     if (m_stats.enabled) // don't do any time intensive work in stats mode
585         return;
586 
587     PerfSymbolTable *kernelSymbols = symbolTable(s_kernelPid);
588     PerfSymbolTable *userSymbols = symbolTable(sample.pid());
589 
590     for (int unwindingAttempt = 0; unwindingAttempt < 2; ++unwindingAttempt) {
591         m_currentUnwind.isInterworking = false;
592         m_currentUnwind.firstGuessedFrame = -1;
593         m_currentUnwind.sample = &sample;
594         m_currentUnwind.frames.clear();
595 
596         userSymbols->updatePerfMap();
597         if (!sample.callchain().isEmpty() || !sample.branchStack().isEmpty())
598             resolveCallchain();
599 
600         bool userDirty = userSymbols->cacheIsDirty();
601         bool kernelDirty = kernelSymbols->cacheIsDirty();
602 
603         // only try to unwind when resolveCallchain did not dirty the cache
604         if (!userDirty && !kernelDirty) {
605             if (sample.registerAbi() != 0 && sample.userStack().length() > 0) {
606                 unwindStack();
607                 userDirty = userSymbols->cacheIsDirty();
608             } else {
609                 break;
610             }
611         }
612 
613         // when the cache is dirty, we clean it up and try again, otherwise we can
614         // stop as unwinding should have succeeded
615         if (userDirty)
616             userSymbols->clearCache(); // fail, try again
617         if (kernelDirty)
618             kernelSymbols->clearCache();
619         if (!userDirty && !kernelDirty)
620             break; // success
621     }
622 
623     // If nothing was found, at least look up the IP
624     if (m_currentUnwind.frames.isEmpty()) {
625         const bool isKernel = ipIsInKernelSpace(sample.ip());
626         PerfSymbolTable *ipSymbols = isKernel ? kernelSymbols : userSymbols;
627         m_currentUnwind.frames.append(ipSymbols->lookupFrame(sample.ip(), isKernel,
628                                                              &m_currentUnwind.isInterworking));
629     }
630 
631 
632     quint8 numGuessedFrames = 0;
633     if (m_currentUnwind.firstGuessedFrame != -1) {
634         // Squeeze it into 8 bits.
635         int numGuessed = m_currentUnwind.frames.length() - m_currentUnwind.firstGuessedFrame;
636         Q_ASSERT(numGuessed >= 0);
637         numGuessedFrames
638                 = static_cast<quint8>(qMin(static_cast<int>(std::numeric_limits<quint8>::max()),
639                                            numGuessed));
640     }
641 
642     EventType type = Sample;
643     qint32 eventFormatId = -1;
644     const qint32 attributesId = m_attributeIds.value(sample.id(), -1);
645     if (attributesId != -1) {
646         const auto &attribute = m_attributes.at(attributesId);
647         if (attribute.type() == PerfEventAttributes::TYPE_TRACEPOINT) {
648             type = TracePointSample;
649             if (attribute.config() > quint64(std::numeric_limits<qint32>::max()))
650                 qWarning() << "Excessively large event format ID" << attribute.config();
651             else
652                 eventFormatId = static_cast<qint32>(attribute.config());
653         }
654     }
655 
656     QVector<QPair<qint32, quint64>> values;
657     if (sample.readFormats().isEmpty()) {
658         values.push_back({ attributesId, sample.period() });
659     } else {
660         for (const auto& f : sample.readFormats()) {
661             values.push_back({ m_attributeIds.value(f.id, -1), f.value });
662         }
663     }
664 
665     QByteArray buffer;
666     QDataStream stream(&buffer, QIODevice::WriteOnly);
667     stream << static_cast<quint8>(type) << sample.pid()
668            << sample.tid() << sample.time() << sample.cpu() << m_currentUnwind.frames
669            << numGuessedFrames << values;
670 
671     if (type == TracePointSample) {
672         QHash<qint32, QVariant> traceData;
673         const QByteArray &data = sample.rawData();
674         const EventFormat &format = m_tracingData.eventFormat(eventFormatId);
675         for (const FormatField &field : format.fields) {
676             traceData[lookupString(field.name)]
677                     = readTraceData(data, field, m_byteOrder != QSysInfo::ByteOrder);
678         }
679         stream << traceData;
680     }
681 
682     sendBuffer(buffer);
683 }
684 
fork(const PerfRecordFork & sample)685 void PerfUnwind::fork(const PerfRecordFork &sample)
686 {
687     bufferEvent(TaskEvent{sample.childPid(), sample.childTid(), sample.time(), sample.cpu(),
688                           ThreadStart, sample.parentPid()},
689                 &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
690 }
691 
exit(const PerfRecordExit & sample)692 void PerfUnwind::exit(const PerfRecordExit &sample)
693 {
694     bufferEvent(TaskEvent{sample.childPid(), sample.childTid(), sample.time(), sample.cpu(),
695                           ThreadEnd, {}},
696                 &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
697 }
698 
sendString(qint32 id,const QByteArray & string)699 void PerfUnwind::sendString(qint32 id, const QByteArray& string)
700 {
701     QByteArray buffer;
702     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(StringDefinition)
703                                                << id << string;
704     sendBuffer(buffer);
705 }
706 
sendLocation(qint32 id,const PerfUnwind::Location & location)707 void PerfUnwind::sendLocation(qint32 id, const PerfUnwind::Location &location)
708 {
709     QByteArray buffer;
710     Q_ASSERT(location.pid);
711     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(LocationDefinition)
712                                                << id << location;
713     sendBuffer(buffer);
714 }
715 
sendSymbol(qint32 id,const PerfUnwind::Symbol & symbol)716 void PerfUnwind::sendSymbol(qint32 id, const PerfUnwind::Symbol &symbol)
717 {
718     QByteArray buffer;
719     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(SymbolDefinition)
720                                                << id << symbol;
721     sendBuffer(buffer);
722 }
723 
sendError(ErrorCode error,const QString & message)724 void PerfUnwind::sendError(ErrorCode error, const QString &message)
725 {
726     qWarning().noquote().nospace() << error << ": " << message;
727     QByteArray buffer;
728     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(Error)
729                                                << static_cast<qint32>(error) << message;
730     sendBuffer(buffer);
731 }
732 
sendProgress(float percent)733 void PerfUnwind::sendProgress(float percent)
734 {
735     QByteArray buffer;
736     QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(Progress)
737                                                << percent;
738     sendBuffer(buffer);
739 }
740 
resolveString(const QByteArray & string)741 qint32 PerfUnwind::resolveString(const QByteArray& string)
742 {
743     if (string.isEmpty())
744         return -1;
745     auto stringIt = m_strings.find(string);
746     if (stringIt == m_strings.end()) {
747         stringIt = m_strings.insert(string, m_strings.size());
748         sendString(stringIt.value(), string);
749     }
750     return stringIt.value();
751 }
752 
lookupString(const QByteArray & string)753 qint32 PerfUnwind::lookupString(const QByteArray &string)
754 {
755     return m_strings.value(string, -1);
756 }
757 
lookupLocation(const PerfUnwind::Location & location) const758 int PerfUnwind::lookupLocation(const PerfUnwind::Location &location) const
759 {
760     return m_locations.value(location, -1);
761 }
762 
resolveLocation(const Location & location)763 int PerfUnwind::resolveLocation(const Location &location)
764 {
765     auto symbolLocationIt = m_locations.find(location);
766     if (symbolLocationIt == m_locations.end()) {
767         symbolLocationIt = m_locations.insert(location, m_locations.size());
768         sendLocation(symbolLocationIt.value(), location);
769     }
770     return symbolLocationIt.value();
771 }
772 
hasSymbol(int locationId) const773 bool PerfUnwind::hasSymbol(int locationId) const
774 {
775     return m_symbols.contains(locationId);
776 }
777 
resolveSymbol(int locationId,const PerfUnwind::Symbol & symbol)778 void PerfUnwind::resolveSymbol(int locationId, const PerfUnwind::Symbol &symbol)
779 {
780     m_symbols.insert(locationId, symbol);
781     sendSymbol(locationId, symbol);
782 }
783 
findKallsymEntry(quint64 address)784 PerfKallsymEntry PerfUnwind::findKallsymEntry(quint64 address)
785 {
786     if (m_kallsyms.isEmpty() && m_kallsyms.errorString().isEmpty()) {
787         auto path = m_kallsymsPath;
788         if (!m_ignoreKallsymsBuildId) {
789             const auto &buildId = m_buildIds.value(QByteArrayLiteral("[kernel.kallsyms]"));
790             if (!buildId.isEmpty()) {
791                 const auto debugPaths = m_debugPath.split(QDir::listSeparator(),
792                                                           Qt::SkipEmptyParts);
793                 for (const auto &debugPath : debugPaths) {
794                     const QString buildIdPath = debugPath + QDir::separator() +
795                                                 QLatin1String("[kernel.kallsyms]") +
796                                                 QDir::separator() +
797                                                 QString::fromUtf8(buildId.toHex()) +
798                                                 QDir::separator() + QLatin1String("kallsyms");
799                     if (QFile::exists(buildIdPath)) {
800                         path = buildIdPath;
801                         break;
802                     }
803                 }
804             }
805         }
806         if (!m_kallsyms.parseMapping(path)) {
807             sendError(InvalidKallsyms,
808                       tr("Failed to parse kernel symbol mapping file \"%1\": %2")
809                             .arg(path, m_kallsyms.errorString()));
810         }
811     }
812     return m_kallsyms.findEntry(address);
813 }
814 
finishedRound()815 void PerfUnwind::finishedRound()
816 {
817     if (m_stats.enabled)
818         m_stats.finishedRound();
819 
820     // when we parse a perf data stream we may not know whether it contains
821     // FINISHED_ROUND events. now we know, and thus we set the m_maxEventBufferSize
822     // to 0 to disable the heuristic there. Instead, we will now rely on the finished
823     // round events to tell us when to flush the event buffer
824     if (!m_targetEventBufferSize) {
825         // we only flush half of the events we got in this round
826         // this work-arounds bugs in upstream perf which leads to time order violations
827         // across FINISHED_ROUND events which should in theory never happen
828         flushEventBuffer(m_eventBufferSize / 2);
829     } else if (m_timeOrderViolations == 0 && !hasTracePointAttributes()) {
830         qDebug() << "FINISHED_ROUND detected. Switching to automatic buffering";
831         setTargetEventBufferSize(0);
832     }
833 }
834 
835 template<typename Event>
bufferEvent(const Event & event,QList<Event> * buffer,uint * eventCounter)836 void PerfUnwind::bufferEvent(const Event &event, QList<Event> *buffer, uint *eventCounter)
837 {
838     buffer->append(event);
839     m_eventBufferSize += event.size();
840 
841     if (m_stats.enabled) {
842         *eventCounter += 1;
843         m_stats.maxBufferSize = std::max(m_eventBufferSize, m_stats.maxBufferSize);
844         m_stats.totalEventSizePerRound += event.size();
845         m_stats.addEventTime(event.time());
846         // don't return early, stats should include our buffer behavior
847     }
848 
849     if (m_targetEventBufferSize && m_eventBufferSize > m_targetEventBufferSize)
850         flushEventBuffer(m_targetEventBufferSize / 2);
851 }
852 
forwardMmapBuffer(QList<PerfRecordMmap>::Iterator & mmapIt,const QList<PerfRecordMmap>::Iterator & mmapEnd,quint64 timestamp)853 void PerfUnwind::forwardMmapBuffer(QList<PerfRecordMmap>::Iterator &mmapIt,
854                                    const QList<PerfRecordMmap>::Iterator &mmapEnd,
855                                    quint64 timestamp)
856 {
857     for (; mmapIt != mmapEnd && mmapIt->time() <= timestamp; ++mmapIt) {
858         if (!m_stats.enabled) {
859             const auto &buildId = m_buildIds.value(mmapIt->filename());
860             symbolTable(mmapIt->pid())->registerElf(*mmapIt, buildId);
861         }
862         m_eventBufferSize -= mmapIt->size();
863     }
864 }
865 
866 template<typename T>
sortByTime(const T & lhs,const T & rhs)867 bool sortByTime(const T& lhs, const T& rhs)
868 {
869     return lhs.time() < rhs.time();
870 }
871 
flushEventBuffer(uint desiredBufferSize)872 void PerfUnwind::flushEventBuffer(uint desiredBufferSize)
873 {
874     // stable sort here to keep order of events with the same time
875     // esp. when we runtime-attach, we will get lots of mmap events with time 0
876     // which we must not shuffle
877     std::stable_sort(m_mmapBuffer.begin(), m_mmapBuffer.end(), sortByTime<PerfRecord>);
878     std::stable_sort(m_sampleBuffer.begin(), m_sampleBuffer.end(), sortByTime<PerfRecord>);
879     std::stable_sort(m_taskEventsBuffer.begin(), m_taskEventsBuffer.end(), sortByTime<TaskEvent>);
880 
881     if (m_stats.enabled) {
882         for (const auto &sample : m_sampleBuffer) {
883             if (sample.time() < m_lastFlushMaxTime)
884                 ++m_stats.numTimeViolatingSamples;
885             else
886                 break;
887         }
888         for (const auto &mmap : m_mmapBuffer) {
889             if (mmap.time() < m_lastFlushMaxTime)
890                 ++m_stats.numTimeViolatingMmaps;
891             else
892                 break;
893         }
894     }
895 
896     bool violatesTimeOrder = false;
897     if (!m_mmapBuffer.isEmpty() && m_mmapBuffer.first().time() < m_lastFlushMaxTime) {
898         // when an mmap event is not following our desired time order, it can
899         // severly break our analysis. as such we report a real error in these cases
900         sendError(TimeOrderViolation,
901                   tr("Time order violation of MMAP event across buffer flush detected. "
902                      "Event time is %1, max time during last buffer flush was %2. "
903                      "This potentially breaks the data analysis.")
904                     .arg(m_mmapBuffer.first().time()).arg(m_lastFlushMaxTime));
905         violatesTimeOrder = true;
906     }
907 
908     auto mmapIt = m_mmapBuffer.begin();
909     auto mmapEnd = m_mmapBuffer.end();
910 
911     auto sampleIt = m_sampleBuffer.begin();
912     auto sampleEnd = m_sampleBuffer.end();
913 
914     uint bufferSize = m_eventBufferSize;
915 
916     auto taskEventIt = m_taskEventsBuffer.begin();
917     auto taskEventEnd = m_taskEventsBuffer.end();
918 
919     for (; m_eventBufferSize > desiredBufferSize && sampleIt != sampleEnd; ++sampleIt) {
920         const quint64 timestamp = sampleIt->time();
921 
922         if (timestamp < m_lastFlushMaxTime) {
923             if (!violatesTimeOrder) {
924                 qWarning() << "Time order violation across buffer flush detected:"
925                            << "Event time =" << timestamp << ","
926                            << "max time during last buffer flush = " << m_lastFlushMaxTime;
927                 // we don't send an error for samples with broken times, since these
928                 // are usually harmless and actually occur relatively often
929                 // if desired, one can detect these issues on the client side anyways,
930                 // based on the sample times
931                 violatesTimeOrder = true;
932             }
933         } else {
934             // We've forwarded past the violating events as we couldn't do anything about those
935             // anymore. Now break and wait for the larger buffer to fill up, so that we avoid
936             // further violations in the yet to be processed events.
937             if (violatesTimeOrder) {
938                 // Process any remaining mmap events violating the previous buffer flush.
939                 // Otherwise we would catch the same ones again in the next round.
940                 forwardMmapBuffer(mmapIt, mmapEnd, m_lastFlushMaxTime);
941                 break;
942             }
943 
944             m_lastFlushMaxTime = timestamp;
945         }
946 
947         for (; taskEventIt != taskEventEnd && taskEventIt->time() <= sampleIt->time();
948              ++taskEventIt) {
949             if (!m_stats.enabled) {
950                 // flush the mmap buffer on fork events to allow initialization with the correct state
951                 if (taskEventIt->m_type == ThreadStart && taskEventIt->m_pid != taskEventIt->m_payload) {
952                     forwardMmapBuffer(mmapIt, mmapEnd, taskEventIt->time());
953                     const auto childPid = taskEventIt->m_pid;
954                     const auto parentPid = taskEventIt->m_payload.value<qint32>();
955                     symbolTable(childPid)->initAfterFork(symbolTable(parentPid));
956                 } else if (taskEventIt->m_type == ThreadEnd && taskEventIt->m_pid == taskEventIt->m_tid) {
957                     delete m_symbolTables.take(taskEventIt->m_pid);
958                 }
959 
960                 sendTaskEvent(*taskEventIt);
961             }
962             m_eventBufferSize -= taskEventIt->size();
963         }
964 
965         forwardMmapBuffer(mmapIt, mmapEnd, timestamp);
966 
967         analyze(*sampleIt);
968         m_eventBufferSize -= sampleIt->size();
969     }
970 
971     // also flush task events after samples got depleted
972     // this ensures we send all of them, even for situations where the client
973     // application is not CPU-heavy but rather sleeps most of the time
974     for (; m_eventBufferSize > desiredBufferSize && taskEventIt != taskEventEnd; ++taskEventIt) {
975         if (!m_stats.enabled) {
976             sendTaskEvent(*taskEventIt);
977         }
978         m_eventBufferSize -= taskEventIt->size();
979     }
980 
981     if (m_stats.enabled) {
982         ++m_stats.numBufferFlushes;
983         const auto samples = std::distance(m_sampleBuffer.begin(), sampleIt);
984         Q_ASSERT(samples >= 0 && samples < std::numeric_limits<uint>::max());
985         m_stats.maxSamplesPerFlush = std::max(static_cast<uint>(samples),
986                                               m_stats.maxSamplesPerFlush);
987         const auto mmaps = std::distance(m_mmapBuffer.begin(), mmapIt);
988         Q_ASSERT(mmaps >= 0 && mmaps < std::numeric_limits<uint>::max());
989         m_stats.maxMmapsPerFlush = std::max(static_cast<uint>(mmaps),
990                                             m_stats.maxMmapsPerFlush);
991         const auto taskEvents = std::distance(m_taskEventsBuffer.begin(), taskEventIt);
992         Q_ASSERT(taskEvents >= 0 && taskEvents < std::numeric_limits<uint>::max());
993         m_stats.maxTaskEventsPerFlush = std::max(static_cast<uint>(taskEvents),
994                                                       m_stats.maxTaskEventsPerFlush);
995     }
996 
997     m_sampleBuffer.erase(m_sampleBuffer.begin(), sampleIt);
998     m_mmapBuffer.erase(m_mmapBuffer.begin(), mmapIt);
999     m_taskEventsBuffer.erase(m_taskEventsBuffer.begin(), taskEventIt);
1000 
1001     if (!violatesTimeOrder)
1002         return;
1003 
1004     // Increase buffer size to reduce future time order violations
1005     ++m_timeOrderViolations;
1006 
1007     // If we had a larger event buffer before, increase.
1008     if (bufferSize < m_lastEventBufferSize)
1009         bufferSize = m_lastEventBufferSize;
1010 
1011     // Double the size, clamping by UINT_MAX.
1012     if (bufferSize > std::numeric_limits<uint>::max() / 2)
1013         bufferSize = std::numeric_limits<uint>::max();
1014     else
1015         bufferSize *= 2;
1016 
1017     // Clamp by max buffer size.
1018     if (bufferSize > m_maxEventBufferSize)
1019         bufferSize = m_maxEventBufferSize;
1020 
1021     qDebug() << "Increasing buffer size to" << bufferSize;
1022     setTargetEventBufferSize(bufferSize);
1023 }
1024 
contextSwitch(const PerfRecordContextSwitch & contextSwitch)1025 void PerfUnwind::contextSwitch(const PerfRecordContextSwitch& contextSwitch)
1026 {
1027     bufferEvent(TaskEvent{contextSwitch.pid(), contextSwitch.tid(),
1028                 contextSwitch.time(), contextSwitch.cpu(),
1029                 ContextSwitchDefinition,
1030                 static_cast<bool>(contextSwitch.misc() & PERF_RECORD_MISC_SWITCH_OUT)},
1031                 &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
1032 }
1033 
sendTaskEvent(const TaskEvent & taskEvent)1034 void PerfUnwind::sendTaskEvent(const TaskEvent& taskEvent)
1035 {
1036     QByteArray buffer;
1037     QDataStream stream(&buffer, QIODevice::WriteOnly);
1038     stream << static_cast<quint8>(taskEvent.m_type)
1039            << taskEvent.m_pid << taskEvent.m_tid
1040            << taskEvent.m_time << taskEvent.m_cpu;
1041 
1042     if (taskEvent.m_type == ContextSwitchDefinition)
1043         stream << taskEvent.m_payload.value<bool>();
1044     else if (taskEvent.m_type == Command || taskEvent.m_type == ThreadStart)
1045         stream << taskEvent.m_payload.value<qint32>();
1046     else if (taskEvent.m_type == LostDefinition)
1047         stream << taskEvent.m_payload.value<quint64>();
1048 
1049     sendBuffer(buffer);
1050 }
1051