1 /*****
2  * profiler.h
3  * Andy Hammerlindl 2010/07/24
4  *
5  * Profiler for the execution of the virtual machine bytecode.
6  *****/
7 
8 #ifndef PROFILER_H
9 #define PROFILER_H
10 
11 #include <sys/time.h>
12 
13 #include <iostream>
14 
15 #include "inst.h"
16 
17 namespace vm {
18 
19 #ifdef DEBUG_BLTIN
20 string lookupBltin(bltin b);
21 #endif
22 
positionFromLambda(lambda * func)23 inline position positionFromLambda(lambda *func) {
24   if (func == 0)
25     return position();
26 
27   program& code = *func->code;
28 
29   // Check for empty program.
30   if (code.begin() == code.end())
31     return position();
32 
33   return code.begin()->pos;
34 }
35 
printNameFromLambda(ostream & out,lambda * func)36 inline void printNameFromLambda(ostream& out, lambda *func) {
37   if (!func) {
38     out << "<top level>";
39     return;
40   }
41 
42 #ifdef DEBUG_FRAME
43   string name = func->name;
44 #else
45   string name = "";
46 #endif
47 
48   // If unnamed, use the pointer address.
49   if (name.empty())
50     out << func;
51   else
52     out << name;
53 
54   out << " at ";
55   positionFromLambda(func).printTerse(out);
56 }
57 
printNameFromBltin(ostream & out,bltin b)58 inline void printNameFromBltin(ostream& out, bltin b) {
59 #ifdef DEBUG_BLTIN
60   string name = lookupBltin(b);
61 #else
62   string name = "";
63 #endif
64 
65   if (!name.empty())
66     out << name << " ";
67   out << "(builtin at " << (void *)b << ")";
68 }
69 
70 class profiler : public gc {
71   // To do call graph analysis, each call stack that occurs in practice is
72   // represented by a node.  For instance, if f and g are functions, then
73   //   f -> g -> g
74   // is represented by a node and
75   //   g -> f -> g
76   // is represented by a different one.
77   struct node {
78     // The top-level function of the call stack.  It is either an asymptote
79     // function with a given lambda, or a builtin function, with a given
80     // bltin.
81     lambda *func;
82     bltin cfunc;
83 
84     // The number of times the top-level function has been called resulting in
85     // this specific call stack.
86     int calls;
87 
88     // The number of bytecode instructions executed with this exact call stack.
89     // It does not include time spent in called function.
90     int instructions;
91 
92     // Number of instructions spent in this function or its children.  This is
93     // computed by computeTotals.
94     int instTotal;
95 
96     // The number of real-time nanoseconds spent in this node.  WARNING: May
97     // be wildly inaccurate.
98     long long nsecs;
99 
100     // Total including children.
101     long long nsecsTotal;
102 
103     // Call stacks resulting from calls during this call stack.
104     mem::vector<node> children;
105 
nodenode106     node()
107       : func(0), cfunc(0), calls(0),
108         instructions(0), instTotal(0),
109         nsecs(0), nsecsTotal(0) {}
110 
nodenode111     node(lambda *func)
112       : func(func), cfunc(0), calls(0),
113         instructions(0), instTotal(0),
114         nsecs(0), nsecsTotal(0) {}
115 
nodenode116     node(bltin b)
117       : func(0), cfunc(b), calls(0),
118         instructions(0), instTotal(0),
119         nsecs(0), nsecsTotal(0) {}
120 
121     // Return the call stack resulting from a call to func when this call
122     // stack is current.
getChildnode123     node *getChild(lambda *func) {
124       size_t n = children.size();
125       for (size_t i = 0; i < n; ++i)
126         if (children[i].func == func)
127           return &children[i];
128 
129       // Not found, create a new one.
130       children.push_back(node(func));
131       return &children.back();
132     }
getChildnode133     node *getChild(bltin func) {
134       size_t n = children.size();
135       for (size_t i = 0; i < n; ++i)
136         if (children[i].cfunc == func)
137           return &children[i];
138 
139       // Not found, create a new one.
140       children.push_back(node(func));
141       return &children.back();
142     }
143 
computeTotalsnode144     void computeTotals() {
145       instTotal = instructions;
146       nsecsTotal = nsecs;
147       size_t n = children.size();
148       for (size_t i = 0; i < n; ++i) {
149         children[i].computeTotals();
150         instTotal += children[i].instTotal;
151         nsecsTotal += children[i].nsecsTotal;
152       }
153     }
154 
155 
156 
pydumpnode157     void pydump(ostream& out) {
158 #ifdef DEBUG_FRAME
159       string name = func ? func->name : "<top level>";
160 #else
161       string name = "";
162 #endif
163 
164       out << "dict(\n"
165            << "    name = '" << name << " " << func << "',\n"
166            << "    pos = '" << positionFromLambda(func) << "',\n"
167            << "    calls = " << calls << ",\n"
168            << "    instructions = " << instructions << ",\n"
169            << "    nsecs = " << nsecs << ",\n"
170            << "    children = [\n";
171 
172       size_t n = children.size();
173       for (size_t i = 0; i < n; ++i) {
174         children[i].pydump(out);
175         out << ",\n";
176       }
177 
178       out << "    ])\n";
179     }
180   };
181 
182   // An empty call stack.
183   node emptynode;
184 
185   // All of the callstacks.
186   std::stack<node *> callstack;
187 
topnode()188   node &topnode() {
189     return *callstack.top();
190   }
191 
192   // Arc representing one function calling another.  Used only for building
193   // the output for kcachegrind.
194   struct arc : public gc {
195     int calls;
196     int instTotal;
197     long long nsecsTotal;
198 
arcarc199     arc() : calls(0), instTotal(0), nsecsTotal(0) {}
200 
addarc201     void add(node& n) {
202       calls += n.calls;
203       instTotal += n.instTotal;
204       nsecsTotal += n.nsecsTotal;
205     }
206   };
207 
208   // Representing one function and its calls to other functions.
209   struct fun : public gc {
210     int instructions;
211     long long nsecs;
212     mem::map<lambda *, arc> arcs;
213     mem::map<bltin, arc> carcs;
214 
funfun215     fun() : instructions(0), nsecs(0) {}
216 
addChildTimefun217     void addChildTime(node& n) {
218       if (n.cfunc)
219         carcs[n.cfunc].add(n);
220       else
221         arcs[n.func].add(n);
222     }
223 
analysefun224     void analyse(node& n) {
225       instructions += n.instructions;
226       nsecs += n.nsecs;
227       size_t numChildren = n.children.size();
228       for (size_t i = 0; i < numChildren; ++i)
229         addChildTime(n.children[i]);
230     }
231 
dumpfun232     void dump(ostream& out) {
233       // The unused line number needed by kcachegrind.
234       static const string POS = "1";
235 
236       out << POS << " " << instructions << " " << nsecs << "\n";
237       for (mem::map<lambda *, arc>::iterator i = arcs.begin();
238            i != arcs.end();
239            ++i)
240       {
241         lambda *l = i->first;
242         arc& a = i->second;
243 
244         out << "cfl=" << positionFromLambda(l) << "\n";
245 
246         out << "cfn=";
247         printNameFromLambda(out, l);
248         out << "\n";
249 
250         out << "calls=" << a.calls << " " << POS << "\n";
251         out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n";
252       }
253       for (mem::map<bltin, arc>::iterator i = carcs.begin();
254            i != carcs.end();
255            ++i)
256       {
257         bltin b = i->first;
258         arc& a = i->second;
259 
260         out << "cfl=C++ code" << endl;
261 
262         out << "cfn=";
263         printNameFromBltin(out, b);
264         out << "\n";
265 
266         out << "calls=" << a.calls << " " << POS << "\n";
267         out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n";
268       }
269     }
270   };
271 
272   // The data for each asymptote function.
273   mem::map<lambda *, fun> funs;
274 
275   // The data for each C++ function.
276   mem::map<bltin, fun> cfuns;
277 
analyseNode(node & n)278   void analyseNode(node& n) {
279     fun& f = n.cfunc ? cfuns[n.cfunc] :
280                        funs[n.func];
281 
282     f.analyse(n);
283 
284     size_t numChildren = n.children.size();
285     for (size_t i = 0; i < numChildren; ++i)
286       analyseNode(n.children[i]);
287   }
288 
289   // Convert data in the tree of callstack nodes into data for each function.
analyseData()290   void analyseData() {
291     emptynode.computeTotals();
292     analyseNode(emptynode);
293   }
294 
295 
296   // Timing data.
297   struct timeval timestamp;
298 
startLap()299   void startLap() {
300     gettimeofday(&timestamp, 0);
301   }
302 
timeAndResetLap()303   long long timeAndResetLap() {
304     struct timeval now;
305     gettimeofday(&now, 0);
306     long long nsecs = 1000000000LL * (now.tv_sec - timestamp.tv_sec) +
307                       1000LL * (now.tv_usec - timestamp.tv_usec);
308     timestamp = now;
309     return nsecs;
310   }
311 
312   // Called whenever the stack is about to change, in order to record the time
313   // duration for the current node.
recordTime()314   void recordTime() {
315     topnode().nsecs += timeAndResetLap();
316   }
317 
318 public:
319   profiler();
320 
321   void beginFunction(lambda *func);
322   void endFunction(lambda *func);
323   void beginFunction(bltin func);
324   void endFunction(bltin func);
325   void recordInstruction();
326 
327   // TODO: Add position, type of instruction info to profiling.
328 
329   // Dump all of the data out in a format that can be read into Python.
330   void pydump(ostream &out);
331 
332   // Dump all of the data in a format for kcachegrind.
333   void dump(ostream& out);
334 
335 };
336 
profiler()337 inline profiler::profiler()
338   : emptynode()
339 {
340     callstack.push(&emptynode);
341     startLap();
342 }
343 
beginFunction(lambda * func)344 inline void profiler::beginFunction(lambda *func) {
345   assert(func);
346   assert(!callstack.empty());
347 
348   recordTime();
349 
350   callstack.push(topnode().getChild(func));
351   ++topnode().calls;
352 }
353 
endFunction(lambda * func)354 inline void profiler::endFunction(lambda *func) {
355   assert(func);
356   assert(!callstack.empty());
357   assert(topnode().func == func);
358 
359   recordTime();
360 
361   callstack.pop();
362 }
363 
beginFunction(bltin cfunc)364 inline void profiler::beginFunction(bltin cfunc) {
365   assert(cfunc);
366   assert(!callstack.empty());
367 
368   recordTime();
369 
370   callstack.push(topnode().getChild(cfunc));
371   ++topnode().calls;
372 }
373 
endFunction(bltin cfunc)374 inline void profiler::endFunction(bltin cfunc) {
375   assert(cfunc);
376   assert(!callstack.empty());
377   assert(topnode().cfunc == cfunc);
378 
379   recordTime();
380 
381   callstack.pop();
382 }
383 
recordInstruction()384 inline void profiler::recordInstruction() {
385   assert(!callstack.empty());
386   ++topnode().instructions;
387 }
388 
pydump(ostream & out)389 inline void profiler::pydump(ostream& out) {
390   out << "profile = ";
391   emptynode.pydump(out);
392 }
393 
dump(ostream & out)394 inline void profiler::dump(ostream& out) {
395   analyseData();
396 
397   out << "events: Instructions Nanoseconds\n";
398 
399   for (mem::map<lambda *, fun>::iterator i = funs.begin();
400        i != funs.end();
401        ++i)
402   {
403     lambda *l = i->first;
404     fun& f = i->second;
405 
406     out << "fl=" << positionFromLambda(l) << "\n";
407 
408     out << "fn=";
409     printNameFromLambda(out, l);
410     out << "\n";
411 
412     f.dump(out);
413   }
414   for (mem::map<bltin, fun>::iterator i = cfuns.begin();
415        i != cfuns.end();
416        ++i)
417   {
418     bltin b = i->first;
419     fun& f = i->second;
420 
421     out << "fl=C++ code\n";
422 
423     out << "fn=";
424     printNameFromBltin(out, b);
425     out << "\n";
426 
427     f.dump(out);
428   }
429 }
430 
431 
432 } // namespace vm
433 
434 #endif // PROFILER_H
435