1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd
4 ** All rights reserved.
5 ** For any questions to The Qt Company, please use contact form at http://www.qt.io/contact-us
6 **
7 ** This file is part of the Qt Enterprise Perf Profiler Add-on.
8 **
9 ** GNU General Public License Usage
10 ** This file may be used under the terms of the GNU General Public License
11 ** version 3 as published by the Free Software Foundation and appearing in
12 ** the file LICENSE.GPLv3 included in the packaging of this file. Please
13 ** review the following information to ensure the GNU General Public License
14 ** requirements will be met: https://www.gnu.org/licenses/gpl.html.
15 **
16 ** If you have questions regarding the use of this file, please use
17 ** contact form at http://www.qt.io/contact-us
18 **
19 ****************************************************************************/
20 
21 #pragma once
22 
23 #include "perfdata.h"
24 #include "perfkallsyms.h"
25 #include "perfregisterinfo.h"
26 #include "perftracingdata.h"
27 #include "perfaddresscache.h"
28 
29 #include <libdwfl.h>
30 
31 #include <QByteArray>
32 #include <QDir>
33 #include <QHash>
34 #include <QIODevice>
35 #include <QList>
36 #include <QObject>
37 #include <QString>
38 #include <QMap>
39 #include <QVariant>
40 
41 #include <limits>
42 
43 class PerfSymbolTable;
44 class PerfUnwind : public QObject
45 {
46     Q_OBJECT
47 public:
48     enum EventType {
49         ThreadStart,
50         ThreadEnd,
51         Command,
52         LocationDefinition,
53         SymbolDefinition,
54         StringDefinition,
55         LostDefinition,
56         FeaturesDefinition,
57         Error,
58         Progress,
59         TracePointFormat,
60         AttributesDefinition,
61         ContextSwitchDefinition,
62         Sample,
63         TracePointSample,
64         InvalidType
65     };
66 
67     struct Location {
68         explicit Location(quint64 address = 0, quint64 relAddr = 0, qint32 file = -1,
69                           quint32 pid = 0, qint32 line = 0, qint32 column = 0,
70                           qint32 parentLocationId = -1) :
addressLocation71             address(address), relAddr(relAddr), file(file), pid(pid), line(line), column(column),
72             parentLocationId(parentLocationId) {}
73 
74         quint64 address;
75         quint64 relAddr;
76         qint32 file;
77         quint32 pid;
78         qint32 line;
79         qint32 column;
80         qint32 parentLocationId;
81     };
82 
83     struct Symbol {
84         explicit Symbol(qint32 name = -1, quint64 relAddr = 0, quint64 size = 0, qint32 binary = -1, qint32 path = -1, qint32 actualPath = -1,
85                         bool isKernel = false) :
nameSymbol86             name(name), relAddr(relAddr), size(size), binary(binary), path(path), actualPath(actualPath), isKernel(isKernel)
87         {}
88 
89         qint32 name;
90         quint64 relAddr;
91         quint64 size;
92         qint32 binary;
93         qint32 path;
94         qint32 actualPath;
95         bool isKernel;
96     };
97 
98     struct UnwindInfo {
UnwindInfoUnwindInfo99         UnwindInfo() : frames(0), unwind(nullptr), sample(nullptr), maxFrames(64),
100             firstGuessedFrame(-1), isInterworking(false) {}
101 
102         QHash<qint32, QHash<quint64, Dwarf_Word>> stackValues;
103         QVector<qint32> frames;
104         PerfUnwind *unwind;
105         const PerfRecordSample *sample;
106         int maxFrames;
107         int firstGuessedFrame;
108         bool isInterworking;
109     };
110 
111     struct Stats
112     {
StatsStats113         Stats()
114             : numSamples(0), numMmaps(0), numRounds(0), numBufferFlushes(0),
115             numTimeViolatingSamples(0), numTimeViolatingMmaps(0),
116             numSamplesInRound(0), numMmapsInRound(0), numTaskEventsInRound(0),
117             maxSamplesPerRound(0), maxMmapsPerRound(0), maxTaskEventsPerRound(0),
118             maxSamplesPerFlush(0), maxMmapsPerFlush(0), maxTaskEventsPerFlush(0),
119             maxBufferSize(0), maxTotalEventSizePerRound(0),
120             maxTime(0), maxTimeBetweenRounds(0), maxReorderTime(0),
121             lastRoundTime(0), totalEventSizePerRound(0),
122             enabled(false)
123         {}
124 
125         void addEventTime(quint64 time);
126         void finishedRound();
127 
128         quint64 numSamples;
129         quint64 numMmaps;
130         quint64 numRounds;
131         quint64 numBufferFlushes;
132         quint64 numTimeViolatingSamples;
133         quint64 numTimeViolatingMmaps;
134         uint numSamplesInRound;
135         uint numMmapsInRound;
136         uint numTaskEventsInRound;
137         uint maxSamplesPerRound;
138         uint maxMmapsPerRound;
139         uint maxTaskEventsPerRound;
140         uint maxSamplesPerFlush;
141         uint maxMmapsPerFlush;
142         uint maxTaskEventsPerFlush;
143         uint maxBufferSize;
144         uint maxTotalEventSizePerRound;
145         quint64 maxTime;
146         quint64 maxTimeBetweenRounds;
147         quint64 maxReorderTime;
148         quint64 lastRoundTime;
149         uint totalEventSizePerRound;
150         bool enabled;
151     };
152 
153     static const qint32 s_kernelPid;
154     static QString defaultDebugInfoPath();
155     static QString defaultKallsymsPath();
156 
157     PerfUnwind(QIODevice *output, const QString &systemRoot = QDir::rootPath(),
158                const QString &debugPath = defaultDebugInfoPath(),
159                const QString &extraLibs = QString(), const QString &appPath = QString(),
160                bool printStats = false);
161     ~PerfUnwind();
162 
kallsymsPath()163     QString kallsymsPath() const { return m_kallsymsPath; }
setKallsymsPath(const QString & kallsymsPath)164     void setKallsymsPath(const QString &kallsymsPath) { m_kallsymsPath = kallsymsPath; }
165 
ignoreKallsymsBuildId()166     bool ignoreKallsymsBuildId() const { return m_ignoreKallsymsBuildId; }
setIgnoreKallsymsBuildId(bool ignore)167     void setIgnoreKallsymsBuildId(bool ignore) { m_ignoreKallsymsBuildId = ignore; }
168 
maxEventBufferSize()169     uint maxEventBufferSize() const { return m_maxEventBufferSize; }
170     void setMaxEventBufferSize(uint size);
171 
targetEventBufferSize()172     uint targetEventBufferSize() const { return m_targetEventBufferSize; }
173     void setTargetEventBufferSize(uint size);
174 
maxUnwindFrames()175     int maxUnwindFrames() const { return m_currentUnwind.maxFrames; }
setMaxUnwindFrames(int maxUnwindFrames)176     void setMaxUnwindFrames(int maxUnwindFrames) { m_currentUnwind.maxFrames = maxUnwindFrames; }
177 
architecture()178     PerfRegisterInfo::Architecture architecture() const { return m_architecture; }
setArchitecture(PerfRegisterInfo::Architecture architecture)179     void setArchitecture(PerfRegisterInfo::Architecture architecture)
180     {
181         m_architecture = architecture;
182     }
183 
setByteOrder(QSysInfo::Endian byteOrder)184     void setByteOrder(QSysInfo::Endian byteOrder) { m_byteOrder = byteOrder; }
byteOrder()185     QSysInfo::Endian byteOrder() const { return m_byteOrder; }
186 
187     void registerElf(const PerfRecordMmap &mmap);
188     void comm(const PerfRecordComm &comm);
189     void attr(const PerfRecordAttr &attr);
190     void lost(const PerfRecordLost &lost);
191     void features(const PerfFeatures &features);
192     void tracing(const PerfTracingData &tracingData);
193     void finishedRound();
194     void contextSwitch(const PerfRecordContextSwitch &contextSwitch);
195 
196     bool ipIsInKernelSpace(quint64 ip) const;
197     void sample(const PerfRecordSample &sample);
198 
199     void fork(const PerfRecordFork &sample);
200     void exit(const PerfRecordExit &sample);
201     PerfSymbolTable *symbolTable(qint32 pid);
202 
203     qint32 resolveString(const QByteArray &string);
204     qint32 lookupString(const QByteArray &string);
205 
206     void addAttributes(const PerfEventAttributes &attributes, const QByteArray &name,
207                        const QList<quint64> &ids);
208 
209     int lookupLocation(const Location &location) const;
210     int resolveLocation(const Location &location);
211 
212     bool hasSymbol(int locationId) const;
213     void resolveSymbol(int locationId, const Symbol &symbol);
214 
215     PerfKallsymEntry findKallsymEntry(quint64 address);
addressCache()216     PerfAddressCache *addressCache() { return &m_addressCache; }
217 
218     enum ErrorCode {
219         TimeOrderViolation = 1,
220         MissingElfFile = 2,
221         InvalidKallsyms = 3,
222     };
223     Q_ENUM(ErrorCode)
224     void sendError(ErrorCode error, const QString &message);
225     void sendProgress(float percent);
226 
systemRoot()227     QString systemRoot() const { return m_systemRoot; }
extraLibsPath()228     QString extraLibsPath() const { return m_extraLibsPath; }
appPath()229     QString appPath() const { return m_appPath; }
debugPath()230     QString debugPath() const { return m_debugPath; }
stats()231     Stats stats() const { return m_stats; }
232 
finalize()233     void finalize()
234     {
235         finishedRound();
236         flushEventBuffer(0);
237     }
238 
239 private:
240 
241     enum CallchainContext {
242         PERF_CONTEXT_HV             = static_cast<quint64>(-32),
243         PERF_CONTEXT_KERNEL         = static_cast<quint64>(-128),
244         PERF_CONTEXT_USER           = static_cast<quint64>(-512),
245 
246         PERF_CONTEXT_GUEST          = static_cast<quint64>(-2048),
247         PERF_CONTEXT_GUEST_KERNEL   = static_cast<quint64>(-2176),
248         PERF_CONTEXT_GUEST_USER     = static_cast<quint64>(-2560),
249 
250         PERF_CONTEXT_MAX            = static_cast<quint64>(-4095),
251     };
252 
253     UnwindInfo m_currentUnwind;
254     QIODevice *m_output;
255 
256     Dwfl_Callbacks m_offlineCallbacks;
257     char *m_debugInfoPath;
258 
259     PerfRegisterInfo::Architecture m_architecture;
260 
261 
262     // Root of the file system of the machine that recorded the data. Any binaries and debug
263     // symbols not found in appPath or extraLibsPath have to appear here.
264     QString m_systemRoot;
265 
266     // Extra path to search for binaries and debug symbols before considering the system root
267     QString m_extraLibsPath;
268 
269     // Path where the application being profiled resides. This is the first path to look for
270     // binaries and debug symbols.
271     QString m_appPath;
272 
273     // Path to debug information, e.g. ~/.debug and /usr/local/debug
274     QString m_debugPath;
275 
276     // Path to kallsyms path
277     QString m_kallsymsPath;
278     bool m_ignoreKallsymsBuildId;
279 
280     QList<PerfRecordSample> m_sampleBuffer;
281     QList<PerfRecordMmap> m_mmapBuffer;
282     struct TaskEvent
283     {
284         qint32 m_pid;
285         qint32 m_tid;
286         quint64 m_time;
287         quint32 m_cpu;
288         EventType m_type;
289         QVariant m_payload;
290 
timeTaskEvent291         quint64 time() const { return m_time; }
sizeTaskEvent292         quint64 size() const { return sizeof(TaskEvent); }
293     };
294     QList<TaskEvent> m_taskEventsBuffer;
295     QHash<qint32, PerfSymbolTable *> m_symbolTables;
296     PerfKallsyms m_kallsyms;
297     PerfAddressCache m_addressCache;
298     PerfTracingData m_tracingData;
299 
300     QHash<QByteArray, qint32> m_strings;
301     QHash<Location, qint32> m_locations;
302     QHash<qint32, Symbol> m_symbols;
303     QHash<quint64, qint32> m_attributeIds;
304     QVector<PerfEventAttributes> m_attributes;
305     QHash<QByteArray, QByteArray> m_buildIds;
306 
307     uint m_lastEventBufferSize;
308     uint m_maxEventBufferSize;
309     uint m_targetEventBufferSize;
310     uint m_eventBufferSize;
311 
312     uint m_timeOrderViolations;
313 
314     quint64 m_lastFlushMaxTime;
315     QSysInfo::Endian m_byteOrder = QSysInfo::LittleEndian;
316 
317     Stats m_stats;
318 
319     void unwindStack();
320     void resolveCallchain();
321     void analyze(const PerfRecordSample &sample);
322     void sendBuffer(const QByteArray &buffer);
323     void sendString(qint32 id, const QByteArray &string);
324     void sendLocation(qint32 id, const Location &location);
325     void sendSymbol(qint32 id, const Symbol &symbol);
326     void sendAttributes(qint32 id, const PerfEventAttributes &attributes, const QByteArray &name);
327     void sendEventFormat(qint32 id, const EventFormat &format);
328     void sendTaskEvent(const TaskEvent &taskEvent);
329 
330     template<typename Event>
331     void bufferEvent(const Event &event, QList<Event> *buffer, uint *eventCounter);
332     void flushEventBuffer(uint desiredBufferSize);
333 
334     QVariant readTraceData(const QByteArray &data, const FormatField &field, bool byteSwap);
335     void forwardMmapBuffer(QList<PerfRecordMmap>::Iterator &it,
336                            const QList<PerfRecordMmap>::Iterator &mmapEnd,
337                            quint64 timestamp);
338     void revertTargetEventBufferSize();
339     bool hasTracePointAttributes() const;
340 };
341 
342 uint qHash(const PerfUnwind::Location &location, uint seed = 0);
343 bool operator==(const PerfUnwind::Location &a, const PerfUnwind::Location &b);
344