1 /*****
2 * profiler.h
3 * Andy Hammerlindl 2010/07/24
4 *
5 * Profiler for the execution of the virtual machine bytecode.
6 *****/
7
8 #ifndef PROFILER_H
9 #define PROFILER_H
10
11 #include <sys/time.h>
12
13 #include <iostream>
14
15 #include "inst.h"
16
17 namespace vm {
18
19 #ifdef DEBUG_BLTIN
20 string lookupBltin(bltin b);
21 #endif
22
positionFromLambda(lambda * func)23 inline position positionFromLambda(lambda *func) {
24 if (func == 0)
25 return position();
26
27 program& code = *func->code;
28
29 // Check for empty program.
30 if (code.begin() == code.end())
31 return position();
32
33 return code.begin()->pos;
34 }
35
printNameFromLambda(ostream & out,lambda * func)36 inline void printNameFromLambda(ostream& out, lambda *func) {
37 if (!func) {
38 out << "<top level>";
39 return;
40 }
41
42 #ifdef DEBUG_FRAME
43 string name = func->name;
44 #else
45 string name = "";
46 #endif
47
48 // If unnamed, use the pointer address.
49 if (name.empty())
50 out << func;
51 else
52 out << name;
53
54 out << " at ";
55 positionFromLambda(func).printTerse(out);
56 }
57
printNameFromBltin(ostream & out,bltin b)58 inline void printNameFromBltin(ostream& out, bltin b) {
59 #ifdef DEBUG_BLTIN
60 string name = lookupBltin(b);
61 #else
62 string name = "";
63 #endif
64
65 if (!name.empty())
66 out << name << " ";
67 out << "(builtin at " << (void *)b << ")";
68 }
69
70 class profiler : public gc {
71 // To do call graph analysis, each call stack that occurs in practice is
72 // represented by a node. For instance, if f and g are functions, then
73 // f -> g -> g
74 // is represented by a node and
75 // g -> f -> g
76 // is represented by a different one.
77 struct node {
78 // The top-level function of the call stack. It is either an asymptote
79 // function with a given lambda, or a builtin function, with a given
80 // bltin.
81 lambda *func;
82 bltin cfunc;
83
84 // The number of times the top-level function has been called resulting in
85 // this specific call stack.
86 int calls;
87
88 // The number of bytecode instructions executed with this exact call stack.
89 // It does not include time spent in called function.
90 int instructions;
91
92 // Number of instructions spent in this function or its children. This is
93 // computed by computeTotals.
94 int instTotal;
95
96 // The number of real-time nanoseconds spent in this node. WARNING: May
97 // be wildly inaccurate.
98 long long nsecs;
99
100 // Total including children.
101 long long nsecsTotal;
102
103 // Call stacks resulting from calls during this call stack.
104 mem::vector<node> children;
105
nodenode106 node()
107 : func(0), cfunc(0), calls(0),
108 instructions(0), instTotal(0),
109 nsecs(0), nsecsTotal(0) {}
110
nodenode111 node(lambda *func)
112 : func(func), cfunc(0), calls(0),
113 instructions(0), instTotal(0),
114 nsecs(0), nsecsTotal(0) {}
115
nodenode116 node(bltin b)
117 : func(0), cfunc(b), calls(0),
118 instructions(0), instTotal(0),
119 nsecs(0), nsecsTotal(0) {}
120
121 // Return the call stack resulting from a call to func when this call
122 // stack is current.
getChildnode123 node *getChild(lambda *func) {
124 size_t n = children.size();
125 for (size_t i = 0; i < n; ++i)
126 if (children[i].func == func)
127 return &children[i];
128
129 // Not found, create a new one.
130 children.push_back(node(func));
131 return &children.back();
132 }
getChildnode133 node *getChild(bltin func) {
134 size_t n = children.size();
135 for (size_t i = 0; i < n; ++i)
136 if (children[i].cfunc == func)
137 return &children[i];
138
139 // Not found, create a new one.
140 children.push_back(node(func));
141 return &children.back();
142 }
143
computeTotalsnode144 void computeTotals() {
145 instTotal = instructions;
146 nsecsTotal = nsecs;
147 size_t n = children.size();
148 for (size_t i = 0; i < n; ++i) {
149 children[i].computeTotals();
150 instTotal += children[i].instTotal;
151 nsecsTotal += children[i].nsecsTotal;
152 }
153 }
154
155
156
pydumpnode157 void pydump(ostream& out) {
158 #ifdef DEBUG_FRAME
159 string name = func ? func->name : "<top level>";
160 #else
161 string name = "";
162 #endif
163
164 out << "dict(\n"
165 << " name = '" << name << " " << func << "',\n"
166 << " pos = '" << positionFromLambda(func) << "',\n"
167 << " calls = " << calls << ",\n"
168 << " instructions = " << instructions << ",\n"
169 << " nsecs = " << nsecs << ",\n"
170 << " children = [\n";
171
172 size_t n = children.size();
173 for (size_t i = 0; i < n; ++i) {
174 children[i].pydump(out);
175 out << ",\n";
176 }
177
178 out << " ])\n";
179 }
180 };
181
182 // An empty call stack.
183 node emptynode;
184
185 // All of the callstacks.
186 std::stack<node *> callstack;
187
topnode()188 node &topnode() {
189 return *callstack.top();
190 }
191
192 // Arc representing one function calling another. Used only for building
193 // the output for kcachegrind.
194 struct arc : public gc {
195 int calls;
196 int instTotal;
197 long long nsecsTotal;
198
arcarc199 arc() : calls(0), instTotal(0), nsecsTotal(0) {}
200
addarc201 void add(node& n) {
202 calls += n.calls;
203 instTotal += n.instTotal;
204 nsecsTotal += n.nsecsTotal;
205 }
206 };
207
208 // Representing one function and its calls to other functions.
209 struct fun : public gc {
210 int instructions;
211 long long nsecs;
212 mem::map<lambda *, arc> arcs;
213 mem::map<bltin, arc> carcs;
214
funfun215 fun() : instructions(0), nsecs(0) {}
216
addChildTimefun217 void addChildTime(node& n) {
218 if (n.cfunc)
219 carcs[n.cfunc].add(n);
220 else
221 arcs[n.func].add(n);
222 }
223
analysefun224 void analyse(node& n) {
225 instructions += n.instructions;
226 nsecs += n.nsecs;
227 size_t numChildren = n.children.size();
228 for (size_t i = 0; i < numChildren; ++i)
229 addChildTime(n.children[i]);
230 }
231
dumpfun232 void dump(ostream& out) {
233 // The unused line number needed by kcachegrind.
234 static const string POS = "1";
235
236 out << POS << " " << instructions << " " << nsecs << "\n";
237 for (mem::map<lambda *, arc>::iterator i = arcs.begin();
238 i != arcs.end();
239 ++i)
240 {
241 lambda *l = i->first;
242 arc& a = i->second;
243
244 out << "cfl=" << positionFromLambda(l) << "\n";
245
246 out << "cfn=";
247 printNameFromLambda(out, l);
248 out << "\n";
249
250 out << "calls=" << a.calls << " " << POS << "\n";
251 out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n";
252 }
253 for (mem::map<bltin, arc>::iterator i = carcs.begin();
254 i != carcs.end();
255 ++i)
256 {
257 bltin b = i->first;
258 arc& a = i->second;
259
260 out << "cfl=C++ code" << endl;
261
262 out << "cfn=";
263 printNameFromBltin(out, b);
264 out << "\n";
265
266 out << "calls=" << a.calls << " " << POS << "\n";
267 out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n";
268 }
269 }
270 };
271
272 // The data for each asymptote function.
273 mem::map<lambda *, fun> funs;
274
275 // The data for each C++ function.
276 mem::map<bltin, fun> cfuns;
277
analyseNode(node & n)278 void analyseNode(node& n) {
279 fun& f = n.cfunc ? cfuns[n.cfunc] :
280 funs[n.func];
281
282 f.analyse(n);
283
284 size_t numChildren = n.children.size();
285 for (size_t i = 0; i < numChildren; ++i)
286 analyseNode(n.children[i]);
287 }
288
289 // Convert data in the tree of callstack nodes into data for each function.
analyseData()290 void analyseData() {
291 emptynode.computeTotals();
292 analyseNode(emptynode);
293 }
294
295
296 // Timing data.
297 struct timeval timestamp;
298
startLap()299 void startLap() {
300 gettimeofday(×tamp, 0);
301 }
302
timeAndResetLap()303 long long timeAndResetLap() {
304 struct timeval now;
305 gettimeofday(&now, 0);
306 long long nsecs = 1000000000LL * (now.tv_sec - timestamp.tv_sec) +
307 1000LL * (now.tv_usec - timestamp.tv_usec);
308 timestamp = now;
309 return nsecs;
310 }
311
312 // Called whenever the stack is about to change, in order to record the time
313 // duration for the current node.
recordTime()314 void recordTime() {
315 topnode().nsecs += timeAndResetLap();
316 }
317
318 public:
319 profiler();
320
321 void beginFunction(lambda *func);
322 void endFunction(lambda *func);
323 void beginFunction(bltin func);
324 void endFunction(bltin func);
325 void recordInstruction();
326
327 // TODO: Add position, type of instruction info to profiling.
328
329 // Dump all of the data out in a format that can be read into Python.
330 void pydump(ostream &out);
331
332 // Dump all of the data in a format for kcachegrind.
333 void dump(ostream& out);
334
335 };
336
profiler()337 inline profiler::profiler()
338 : emptynode()
339 {
340 callstack.push(&emptynode);
341 startLap();
342 }
343
beginFunction(lambda * func)344 inline void profiler::beginFunction(lambda *func) {
345 assert(func);
346 assert(!callstack.empty());
347
348 recordTime();
349
350 callstack.push(topnode().getChild(func));
351 ++topnode().calls;
352 }
353
endFunction(lambda * func)354 inline void profiler::endFunction(lambda *func) {
355 assert(func);
356 assert(!callstack.empty());
357 assert(topnode().func == func);
358
359 recordTime();
360
361 callstack.pop();
362 }
363
beginFunction(bltin cfunc)364 inline void profiler::beginFunction(bltin cfunc) {
365 assert(cfunc);
366 assert(!callstack.empty());
367
368 recordTime();
369
370 callstack.push(topnode().getChild(cfunc));
371 ++topnode().calls;
372 }
373
endFunction(bltin cfunc)374 inline void profiler::endFunction(bltin cfunc) {
375 assert(cfunc);
376 assert(!callstack.empty());
377 assert(topnode().cfunc == cfunc);
378
379 recordTime();
380
381 callstack.pop();
382 }
383
recordInstruction()384 inline void profiler::recordInstruction() {
385 assert(!callstack.empty());
386 ++topnode().instructions;
387 }
388
pydump(ostream & out)389 inline void profiler::pydump(ostream& out) {
390 out << "profile = ";
391 emptynode.pydump(out);
392 }
393
dump(ostream & out)394 inline void profiler::dump(ostream& out) {
395 analyseData();
396
397 out << "events: Instructions Nanoseconds\n";
398
399 for (mem::map<lambda *, fun>::iterator i = funs.begin();
400 i != funs.end();
401 ++i)
402 {
403 lambda *l = i->first;
404 fun& f = i->second;
405
406 out << "fl=" << positionFromLambda(l) << "\n";
407
408 out << "fn=";
409 printNameFromLambda(out, l);
410 out << "\n";
411
412 f.dump(out);
413 }
414 for (mem::map<bltin, fun>::iterator i = cfuns.begin();
415 i != cfuns.end();
416 ++i)
417 {
418 bltin b = i->first;
419 fun& f = i->second;
420
421 out << "fl=C++ code\n";
422
423 out << "fn=";
424 printNameFromBltin(out, b);
425 out << "\n";
426
427 f.dump(out);
428 }
429 }
430
431
432 } // namespace vm
433
434 #endif // PROFILER_H
435