1 
2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data            m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2016-2017 Philippe Waroquiers
11 
12    This code generalises the XTree idea that was implemented in
13    the massif tool in Valgrind versions <= 3.12, which is
14       Copyright (C) 2005-2017 Nicholas Nethercote
15        njn@valgrind.org
16 
17    The XTree implementation in this file is however implemented completely
18    differently. Some code has been re-used for the production of the
19    massif file header (e.g. FP_cmd function).
20 
21    This program is free software; you can redistribute it and/or
22    modify it under the terms of the GNU General Public License as
23    published by the Free Software Foundation; either version 2 of the
24    License, or (at your option) any later version.
25 
26    This program is distributed in the hope that it will be useful, but
27    WITHOUT ANY WARRANTY; without even the implied warranty of
28    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
29    General Public License for more details.
30 
31    You should have received a copy of the GNU General Public License
32    along with this program; if not, write to the Free Software
33    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34    02111-1307, USA.
35 
36    The GNU General Public License is contained in the file COPYING.
37 */
38 
39 #include "pub_core_basics.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_clientstate.h"
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_execontext.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcfile.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_hashtable.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_debuginfo.h"
53 #include "pub_core_deduppoolalloc.h"
54 #include "pub_core_xtree.h"    /* self */
55 
56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ?         \
57                           VG_(dmsg)(__VA_ARGS__)                      \
58                           : 0)
59 
60 /* Defines the relevant part of an ec. This is shared between an xt
61    and its snapshots (see XT_shared XArray of xec). */
62 typedef
63    struct _xec {
64      ExeContext* ec;
65      UShort top;        // The first ips of ec to take into account.
66      UShort n_ips_sel;  // The nr of ips from top to take into account.
67    }
68    xec;
69 
70 /* XT_shared maintains the information shared between an XT and all
71    its snapshots. */
72 typedef
73    struct _XT_shared {
74       UWord nrRef; /* nr of XTrees referencing this shared memory. */
75 
76       Alloc_Fn_t alloc_fn;                /* alloc fn (nofail) */
77       const HChar* cc;                    /* cost centre for alloc */
78       Free_Fn_t free_fn;                  /* free fn */
79 
80       /* The data associated to each ec is stored in 2 arrays:
81            an xec array, shared between an xt and all its snapshots.
82            a  data array, private to each XTree.
83          For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
84          xec and data arrays where the ec information is located (this
85          indirection is used to avoid huge xec and data arrays, in
86          case an XTree contains data only for a small number of ec.
87          The offset in the xec and data array is used as xtree ec unique
88          id i.e. an xecu. */
89 
90       UInt  d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
91       UInt* d4ecu2xecu;
92 
93       /* ec information common to an xt and its snapshots. */
94       XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
95 
96       /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
97          See ips_order_cmp. */
98       XArray* ips_order_xecu;
99    } XT_shared;
100 
101 /* NO_OFFSET indicates in d4ecu2xecu  there is no data (yet) for this ec
102    (with the index ecu/4). */
103 #define NO_OFFSET 0xffffffff
104 
new_XT_shared(Alloc_Fn_t alloc_fn,const HChar * cc,void (* free_fn)(void *))105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
106                                  const  HChar* cc,
107                                  void   (*free_fn)(void*))
108 {
109    XT_shared* shared;
110 
111    vg_assert(alloc_fn);
112    vg_assert(cc);
113    vg_assert(free_fn);
114    shared = alloc_fn(cc, sizeof(*shared));
115    shared->nrRef = 0;
116    shared->alloc_fn = alloc_fn;
117    shared->cc = cc;
118    shared->free_fn = free_fn;
119 
120    shared->d4ecu2xecu_sz = 0;
121    shared->d4ecu2xecu = NULL;
122    shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
123    shared->ips_order_xecu = NULL; // Allocated when needed.
124 
125    return shared;
126 }
127 
delete_XT_shared(XT_shared * shared)128 static void delete_XT_shared (XT_shared* shared)
129 {
130    vg_assert(shared->nrRef == 0);
131    shared->free_fn(shared->d4ecu2xecu);
132    VG_(deleteXA)(shared->xec);
133    if (shared->ips_order_xecu != NULL)
134       VG_(deleteXA)(shared->ips_order_xecu);
135    shared->free_fn(shared);
136 }
137 
138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
139    In case stack traces are of different length, an 'absent' ips is
140    considered smaller than any other address. */
141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
ips_order_cmp(const void * vleft,const void * vright)142 static Int ips_order_cmp(const void* vleft, const void* vright)
143 {
144    const Xecu left_xecu  = *(const Xecu*)vleft;
145    const Xecu right_xecu = *(const Xecu*)vright;
146    const xec* left  = VG_(indexXA)(xec_data_for_sort, left_xecu);
147    const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
148    const StackTrace left_ips  = VG_(get_ExeContext_StackTrace)(left->ec)
149       + left->top;
150    const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
151       + right->top;
152    UInt i;
153 
154    const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
155       ? left->n_ips_sel : right->n_ips_sel;
156 
157    // First see if we have a difference on the common nr of ips selected
158    for (i = 0; i < c_n_ips_sel; i++) {
159       if (left_ips[i] == right_ips[i]) continue;
160       if (left_ips[i] < right_ips[i]) return -1;
161       return  1;
162    }
163    // Common part is equal => compare lengths.
164    if (left->n_ips_sel < right->n_ips_sel) return -1;
165    if (left->n_ips_sel > right->n_ips_sel) return  1;
166    return 0;
167 }
168 
169 // If needed, build or refresh shared->ips_order_xecu
ensure_ips_order_xecu_valid(XT_shared * shared)170 static void ensure_ips_order_xecu_valid(XT_shared* shared)
171 {
172    UInt i;
173    UInt n_xecu;
174 
175    if (shared->ips_order_xecu == NULL) {
176       shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
177                                           shared->free_fn, sizeof(Xecu));
178       VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
179       VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
180    }
181 
182    if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
183       return;
184 
185    n_xecu = VG_(sizeXA)(shared->xec);
186    for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
187       VG_(addToXA)(shared->ips_order_xecu, &i);
188 
189    xec_data_for_sort = shared->xec;
190    VG_(sortXA)(shared->ips_order_xecu);
191 }
192 
addRef_XT_shared(XT_shared * shared)193 static void addRef_XT_shared (XT_shared* shared)
194 {
195    shared->nrRef++;
196 }
197 
release_XT_shared(XT_shared * shared)198 static UWord release_XT_shared(XT_shared* shared)
199 {
200    UWord nrRef;
201 
202    vg_assert(shared->nrRef > 0);
203    nrRef = --shared->nrRef;
204    if (nrRef == 0)
205       delete_XT_shared(shared);
206    return nrRef;
207 }
208 
209 
210 struct _XTree {
211    Alloc_Fn_t alloc_fn;                /* alloc fn (nofail) */
212    const HChar* cc;                    /* cost centre for alloc */
213    Free_Fn_t free_fn;                  /* free fn */
214    Word  dataSzB;   /* data size in bytes */
215    XT_init_data_t init_data_fn;
216    XT_add_data_t add_data_fn;
217    XT_sub_data_t sub_data_fn;
218    XT_filter_IPs_t filter_IPs_fn;
219 
220    XT_shared* shared;
221 
222    HChar* tmp_data; /* temporary buffer, to insert new elements. */
223    XArray* data; /* of elements of size dataSzB */
224 };
225 
226 
VG_(XT_create)227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
228                         const HChar* cc,
229                         Free_Fn_t free_fn,
230                         Word dataSzB,
231                         XT_init_data_t init_data_fn,
232                         XT_add_data_t add_data_fn,
233                         XT_sub_data_t sub_data_fn,
234                         XT_filter_IPs_t filter_IPs_fn)
235 {
236    XTree* xt;
237 
238    /* check user-supplied info .. */
239    vg_assert(alloc_fn);
240    vg_assert(free_fn);
241    vg_assert(dataSzB >= 0);
242    vg_assert(init_data_fn);
243    vg_assert(add_data_fn);
244    vg_assert(sub_data_fn);
245 
246    xt = alloc_fn(cc, sizeof(struct _XTree) );
247    xt->alloc_fn  = alloc_fn;
248    xt->cc        = cc;
249    xt->free_fn   = free_fn;
250    xt->dataSzB   = dataSzB;
251    xt->init_data_fn = init_data_fn;
252    xt->add_data_fn = add_data_fn;
253    xt->sub_data_fn = sub_data_fn;
254    xt->filter_IPs_fn = filter_IPs_fn;
255 
256    xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
257    addRef_XT_shared(xt->shared);
258    xt->tmp_data = alloc_fn(cc, xt->dataSzB);
259    xt->data =  VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
260 
261    return xt;
262 }
263 
VG_(XT_snapshot)264 XTree* VG_(XT_snapshot)(XTree* xt)
265 {
266    XTree* nxt;
267 
268    vg_assert(xt);
269 
270    nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
271 
272    *nxt = *xt;
273    addRef_XT_shared(nxt->shared);
274    nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
275    nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
276 
277    return nxt;
278 }
279 
VG_(XT_delete)280 void VG_(XT_delete) ( XTree* xt )
281 {
282    vg_assert(xt);
283 
284    release_XT_shared(xt->shared);
285    xt->free_fn(xt->tmp_data);
286    VG_(deleteXA)(xt->data);
287    xt->free_fn(xt);
288 }
289 
find_or_insert(XTree * xt,ExeContext * ec)290 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
291 {
292 
293    const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
294    XT_shared* shared = xt->shared;
295 
296    /* First grow the d4ecu2xecu array if needed. */
297    if (d4ecu >= shared->d4ecu2xecu_sz) {
298       UInt old_sz = shared->d4ecu2xecu_sz;
299       UInt new_sz = (3 * d4ecu) / 2;
300 
301       if (new_sz < 1000)
302          new_sz = 1000;
303       shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
304                                         new_sz * sizeof(UInt));
305       shared->d4ecu2xecu_sz = new_sz;
306       for (UInt i = old_sz; i < new_sz; i++)
307          shared->d4ecu2xecu[i] = NO_OFFSET;
308    }
309 
310    if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
311       xec xe;
312 
313       xe.ec = ec;
314       if (xt->filter_IPs_fn == NULL) {
315          xe.top = 0;
316          xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
317       } else {
318          UInt top;
319          UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
320          xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
321                            &top, &n_ips_sel);
322          xe.top = (UShort)top;
323          xe.n_ips_sel = (UShort)n_ips_sel;
324       }
325       xt->init_data_fn(xt->tmp_data);
326       VG_(addToXA)(shared->xec, &xe);
327       shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
328    }
329 
330    return shared->d4ecu2xecu[d4ecu];
331 }
332 
VG_(XT_add_to_ec)333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
334 {
335    Xecu xecu = find_or_insert(xt, ec);
336    void* data = VG_(indexXA)(xt->data, xecu);
337 
338    xt->add_data_fn(data, value);
339    return xecu;
340 }
341 
VG_(XT_sub_from_ec)342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
343 {
344    Xecu xecu = find_or_insert(xt, ec);
345    void* data = VG_(indexXA)(xt->data, xecu);
346 
347    xt->sub_data_fn(data, value);
348    return xecu;
349 }
350 
VG_(XT_add_to_xecu)351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
352 {
353    void* data = VG_(indexXA)(xt->data, xecu);
354    xt->add_data_fn(data, value);
355 }
356 
VG_(XT_sub_from_xecu)357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
358 {
359    void* data = VG_(indexXA)(xt->data, xecu);
360    xt->sub_data_fn(data, value);
361 }
362 
VG_(XT_n_ips_sel)363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
364 {
365    xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
366    return (UInt)xe->n_ips_sel;
367 }
368 
VG_(XT_get_ec_from_xecu)369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
370 {
371    xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
372    return xe->ec;
373 }
374 
xt_open(const HChar * outfilename)375 static VgFile* xt_open (const HChar* outfilename)
376 {
377    VgFile* fp;
378 
379    fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
380                    VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IROTH);
381    if (fp == NULL) {
382       VG_(message)(Vg_UserMsg,
383                    "Error: can not open xtree output file `%s'\n",
384                    outfilename);
385    }
386    return fp;
387 }
388 
389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
390 
391 // Print "cmd:" line.
FP_cmd(VgFile * fp)392 static void FP_cmd(VgFile* fp)
393 {
394    UInt i;
395 
396    FP("cmd: ");
397    FP("%s", VG_(args_the_exename));
398    for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
399       HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
400       FP(" %s", arg);
401    }
402    FP("\n");
403 }
404 
405 /* ----------- Callgrind output ------------------------------------------- */
406 
407 /* Output a callgrind format element in compressed format:
408      "name=(pos)" or "name=(pos) value" (if value_new)
409    or not compressed format: "name=value"
410    VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
411    name is the format element (e.g. fl, fn, cfi, cfn, ...).
412    pos is the value dictionary position, used for compressed format.
413    value_new is True if this is the first usage of value. */
FP_pos_str(VgFile * fp,const HChar * name,UInt pos,const HChar * value,Bool value_new)414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
415                        const HChar* value, Bool value_new)
416 {
417    if (!VG_(clo_xtree_compress_strings))
418       FP("%s=%s\n", name, value);
419    else if (value_new)
420       FP("%s=(%d) %s\n", name, pos, value);
421    else
422       FP("%s=(%d)\n", name, pos);
423 }
424 
VG_(XT_callgrind_print)425 void VG_(XT_callgrind_print)
426      (XTree* xt,
427       const HChar* outfilename,
428       const HChar* events,
429       const HChar* (*img_value)(const void* value))
430 {
431    UInt n_xecu;
432    XT_shared* shared = xt->shared;
433    VgFile* fp = xt_open(outfilename);
434    DedupPoolAlloc* fnname_ddpa;
435    DedupPoolAlloc* filename_ddpa;
436    HChar* filename_buf = NULL;
437    UInt filename_buf_size = 0;
438    const HChar* filename_dir;
439    const HChar* filename_name;
440 
441    if (fp == NULL)
442       return;
443 
444    fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
445                                  "XT_callgrind_print.fn", xt->free_fn);
446    filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
447                                    "XT_callgrind_print.fl", xt->free_fn);
448 
449    FP("# callgrind format\n");
450    FP("version: 1\n");
451    FP("creator: xtree-1\n");
452    FP("pid: %d\n", VG_(getpid)());
453    FP_cmd(fp);
454 
455    /* Currently, we only need/support line positions. */
456    FP("\npositions:%s\n", " line");
457 
458    /* Produce one "event:" line for each event, and the "events:" line. */
459    {
460       HChar strtok_events[VG_(strlen)(events)+1];
461       HChar* e;
462       HChar* ssaveptr;
463       HChar* p;
464 
465       VG_(strcpy)(strtok_events, events);
466       for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
467            e != NULL;
468            e = VG_(strtok_r)(NULL, ",", &ssaveptr))
469          FP("event: %s\n", e);
470       FP("events:");
471       VG_(strcpy)(strtok_events, events);
472       for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
473            e != NULL;
474            e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
475          p = e;
476          while (*p) {
477             if (*p == ':')
478                *p = 0;
479             p++;
480          }
481          FP(" %s", e);
482       }
483       FP("\n");
484    }
485    xt->init_data_fn(xt->tmp_data); // to compute totals
486 
487    n_xecu = VG_(sizeXA)(xt->data);
488    vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
489    for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
490       xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
491       if (xe->n_ips_sel == 0)
492          continue;
493 
494       const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
495 
496       // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
497       // in the variables called_filename/called_linenum/called_fnname.
498       // The booleans called_filename_new/called_fnname_new are set to True
499       // the first time the called_filename/called_fnname are encountered.
500       // The called_filename_nr/called_fnname_nr are numbers identifying
501       // the strings  called_filename/called_fnname.
502 #define CALLED_FLF(n)                                                   \
503       if ((n) < 0                                                       \
504           || !VG_(get_filename_linenum)(ep, ips[(n)],                   \
505                                         &filename_name,                 \
506                                         &filename_dir,                  \
507                                         &called_linenum)) {             \
508          filename_name = "UnknownFile???";                              \
509          called_linenum = 0;                                            \
510       }                                                                 \
511       if ((n) < 0                                                       \
512           || !VG_(get_fnname)(ep, ips[(n)], &called_fnname)) {          \
513          called_fnname = "UnknownFn???";                                \
514       }                                                                 \
515       {                                                                 \
516          UInt needed_size = VG_(strlen)(filename_dir) + 1               \
517             + VG_(strlen)(filename_name) + 1;                           \
518          if (filename_buf_size < needed_size) {                         \
519             filename_buf_size = needed_size;                            \
520             filename_buf = VG_(realloc)(xt->cc, filename_buf,           \
521                                         filename_buf_size);             \
522          }                                                              \
523       }                                                                 \
524       VG_(strcpy)(filename_buf, filename_dir);                          \
525       if (filename_buf[0] != '\0') {                                    \
526          VG_(strcat)(filename_buf, "/");                                \
527       }                                                                 \
528       VG_(strcat)(filename_buf, filename_name);                         \
529       called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa,          \
530                                                 filename_buf,           \
531                                                 &called_filename_new);  \
532       called_filename = filename_buf;                                   \
533       called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa,              \
534                                               called_fnname,            \
535                                               &called_fnname_new);
536 
537       /* Instead of unknown fnname ???, CALLED_FLF could use instead:
538          VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
539          but that creates a lot of (useless) nodes at least for
540          valgrind self-hosting. */
541 
542       if (img) {
543          const HChar* called_filename;
544          UInt called_filename_nr;
545          Bool called_filename_new; // True the first time we see this filename.
546          const HChar* called_fnname;
547          UInt called_fnname_nr;
548          Bool called_fnname_new; // True the first time we see this fnname.
549          UInt called_linenum;
550          UInt prev_linenum;
551 
552          const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
553          const DiEpoch ep = VG_(get_ExeContext_epoch)(xe->ec);
554 
555          Int ips_idx = xe->n_ips_sel - 1;
556 
557          if (0) {
558             VG_(printf)("entry img %s\n", img);
559             VG_(pp_ExeContext)(xe->ec);
560             VG_(printf)("\n");
561          }
562          xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
563          CALLED_FLF(ips_idx);
564          for (;
565               ips_idx >= 0;
566               ips_idx--) {
567             FP_pos_str(fp, "fl", called_filename_nr,
568                        called_filename, called_filename_new);
569             FP_pos_str(fp, "fn", called_fnname_nr,
570                        called_fnname, called_fnname_new);
571             if (ips_idx == 0)
572                FP("%d %s\n", called_linenum, img);
573             else
574                FP("%d\n", called_linenum); //no self cost.
575             prev_linenum = called_linenum;
576             if (ips_idx >= 1) {
577                CALLED_FLF(ips_idx-1);
578                FP_pos_str(fp, "cfi", called_filename_nr,
579                           called_filename, called_filename_new);
580                FP_pos_str(fp, "cfn", called_fnname_nr,
581                           called_fnname, called_fnname_new);
582                called_filename_new = False;
583                called_fnname_new = False;
584                /* Giving a call count of 0 allows kcachegrind to hide the calls
585                   column. A call count of 1 means kcachegrind would show in the
586                   calls column the nr of stacktrace containing this arc, which
587                   is very confusing. So, the less bad is to give a 0 call
588                   count. */
589                FP("calls=0 %d\n", called_linenum);
590                FP("%d %s\n", prev_linenum, img);
591             }
592          }
593          FP("\n");
594       }
595    }
596    /* callgrind format is not really fully supporting (yet?) execution trees:
597       in an execution tree, self and inclusive costs are identical, and
598       cannot be added together.
599       If no totals: line is given, callgrind_annotate calculates the addition
600       of all costs, and so gives a wrong totals.
601       Giving a totals: line solves this, but the user must give the option
602       --inclusive=yes (kind of hack) to have all the functions given
603       in the output file. */
604    FP("totals: %s\n", img_value(xt->tmp_data));
605    VG_(fclose)(fp);
606    VG_(deleteDedupPA)(fnname_ddpa);
607    VG_(deleteDedupPA)(filename_ddpa);
608    VG_(free)(filename_buf);
609 }
610 
611 
612 /* ----------- Massif output ---------------------------------------------- */
613 
614 /* For Massif output, some functions from the execontext are not output, a.o.
615    the allocation functions at the top of the stack and the functions below
616    main. So, the StackTrace of the execontexts in the xtree must be filtered.
617    Ms_Ec defines the subset of the stacktrace relevant for the report. */
618 typedef
619    struct {
620       StackTrace ips; // ips and n_ips provides the subset of the xtree ec
621       UInt n_ips;     // to use for a massif report.
622 
623       SizeT report_value; // The value to report for this stack trace.
624    } Ms_Ec;
625 
626 /* Ms_Group defines (at a certain depth) a group of ec context that
627    have the same IPs at the given depth, and have the same 'parent'.
628    total is the sum of the values of all group elements.
629    A Ms_Group can also represent a set of ec contexts that do not
630    have the same IP, but that have each a total which is below the
631    significant size. Such a group has a NULL ms_ec, a zero group_io.
632    n_ec is the nr of insignificant ec that have been collected inside this
633    insignificant group, and total is the sum of all non significant ec
634    at the given depth. */
635 typedef
636    struct {
637       Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
638       Addr group_ip;
639       UInt n_ec;
640       SizeT total;
641    } Ms_Group;
642 
643 /* Compare 2 groups by total, to have bigger total first. */
ms_group_revcmp_total(const void * vleft,const void * vright)644 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
645 {
646    const Ms_Group* left = (const Ms_Group*)vleft;
647    const Ms_Group* right = (const Ms_Group*)vright;
648 
649    // First reverse compare total
650    if (left->total > right->total) return -1;
651    if (left->total < right->total) return  1;
652 
653    /* Equal size => compare IPs.
654       This (somewhat?) helps to have deterministic test results.
655       If this would change between platforms, then we should compare
656       function names/filename/linenr */
657    if (left->group_ip < right->group_ip) return -1;
658    if (left->group_ip > right->group_ip) return  1;
659    return 0;
660 }
661 
662 /* Scan the addresses in ms_ec at the given depth.
663    On return,
664       *groups points to an array of Ms_Group sorted by total.
665       *n_groups is the nr of groups
666    The caller is responsible to free the allocated group array. */
ms_make_groups(UInt depth,Ms_Ec * ms_ec,UInt n_ec,SizeT sig_sz,UInt * n_groups,Ms_Group ** groups)667 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
668                             UInt* n_groups, Ms_Group** groups)
669 {
670    UInt i, g;
671    Addr cur_group_ip = 0;
672 
673    *n_groups = 0;
674 
675    /* Handle special case somewhat more efficiently */
676    if (n_ec == 0) {
677       *groups = NULL;
678       return;
679    }
680 
681    /* Compute how many groups we have. */
682    for (i = 0; i < n_ec; i++) {
683       if (ms_ec[i].n_ips > depth
684           && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
685          (*n_groups)++;
686          cur_group_ip = ms_ec[i].ips[depth];
687       }
688    }
689 
690    /* make the group array. */
691    *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
692    i = 0;
693    for (g = 0; g < *n_groups; g++) {
694       while (ms_ec[i].n_ips <= depth)
695          i++;
696       cur_group_ip = ms_ec[i].ips[depth];
697       (*groups)[g].group_ip = cur_group_ip;
698       (*groups)[g].ms_ec = &ms_ec[i];
699       (*groups)[g].n_ec = 1;
700       (*groups)[g].total = ms_ec[i].report_value;
701       i++;
702       while (i < n_ec
703              && ms_ec[i].n_ips > depth
704              && cur_group_ip == ms_ec[i].ips[depth]) {
705          (*groups)[g].total += ms_ec[i].report_value;
706          i++;
707          (*groups)[g].n_ec++;
708       }
709    }
710 
711    /* Search for insignificant groups, collect them all together
712       in the first insignificant group, and compact the group array. */
713    {
714       UInt insig1; // Position of first insignificant group.
715       UInt n_insig = 0; // Nr of insignificant groups found.
716 
717       for (g = 0; g < *n_groups; g++) {
718          if ((*groups)[g].total < sig_sz) {
719             if (n_insig == 0) {
720                // First insig group => transform it into the special group
721                (*groups)[g].ms_ec = NULL;
722                (*groups)[g].group_ip = 0;
723                (*groups)[g].n_ec = 0;
724                // start the sum of insig total as total
725                insig1 = g;
726             } else {
727                // Add this insig group total into insig1 first group
728                (*groups)[insig1].total += (*groups)[g].total;
729             }
730             n_insig++;
731          } else {
732             if (n_insig > 1)
733                (*groups)[g - n_insig + 1] = (*groups)[g];
734          }
735       }
736       if (n_insig > 0) {
737          (*groups)[insig1].n_ec = n_insig;
738          *n_groups -= n_insig - 1;
739       }
740       DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
741    }
742 
743    /* Sort on total size, bigger size first. */
744    VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
745 }
746 
747 /* Output the given group (located in an xtree at the given depth).
748    indent tells by how much to indent the information output for the group.
749    indent can be bigger than depth when outputting a group that is made
750    of one or more inlined calls: all inlined calls are output with the
751    same depth but with one more indent for each inlined call.  */
ms_output_group(VgFile * fp,UInt depth,UInt indent,Ms_Group * group,SizeT sig_sz,double sig_pct_threshold)752 static void ms_output_group (VgFile* fp, UInt depth, UInt indent,
753                              Ms_Group* group, SizeT sig_sz,
754                              double sig_pct_threshold)
755 {
756    UInt i;
757    Ms_Group* groups;
758    UInt n_groups;
759 
760    // If this is an insignificant group, handle it specially
761    if (group->ms_ec == NULL) {
762       const HChar* s = ( 1 ==  group->n_ec? "," : "s, all" );
763       vg_assert(group->group_ip == 0);
764       FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
765          indent+1, "", group->total, group->n_ec, s, sig_pct_threshold);
766       return;
767    }
768 
769    // Normal group => output the group and its subgroups.
770    ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
771                   &n_groups, &groups);
772 
773    // FIXME JRS EPOCH 28 July 2017: HACK!  Is this correct?
774    const DiEpoch cur_ep = VG_(current_DiEpoch)();
775    // // FIXME PW EPOCH : No, the above is not correct.
776    // Xtree Massif output regroups execontext in the layout of a 'tree'.
777    // So, possibly, the same IP address value can be in 2 different ec, but
778    // the epoch to symbolise this address must be retrieved from the ec it
779    // originates from.
780    // So, to fix this, it is not enough to make a group based on identical
781    // IP addr value, one must also find the di used to symbolise this address,
782    // A group will then be defined as 'same IP and same di'.
783    // Fix not trivial to do, so for the moment, --keep-debuginfo=yes will
784    // have no impact on xtree massif output.
785 
786    Addr cur_ip = group->ms_ec->ips[depth];
787 
788    InlIPCursor *iipc = VG_(new_IIPC)(cur_ep, cur_ip);
789 
790    while (True) {
791       const HChar* buf = VG_(describe_IP)(cur_ep, cur_ip, iipc);
792       Bool is_inlined = VG_(next_IIPC)(iipc);
793 
794       FP("%*s" "n%u: %ld %s\n",
795          indent + 1, "",
796          is_inlined ? 1 : n_groups, // Inlined frames always have one child.
797          group->total,
798          buf);
799 
800       if (!is_inlined) {
801          break;
802       }
803 
804       indent++;
805    }
806 
807    VG_(delete_IIPC)(iipc);
808 
809    /* Output sub groups of this group. */
810    for (i = 0; i < n_groups; i++)
811       ms_output_group(fp, depth+1, indent+1, &groups[i], sig_sz,
812                       sig_pct_threshold);
813 
814    VG_(free)(groups);
815 }
816 
817 /* Allocate and build an array of Ms_Ec sorted by addresses in the
818    Ms_Ec StackTrace. */
prepare_ms_ec(XTree * xt,ULong (* report_value)(const void * value),ULong * top_total,Ms_Ec ** vms_ec,UInt * vn_ec)819 static void prepare_ms_ec (XTree* xt,
820                            ULong (*report_value)(const void* value),
821                            ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
822 {
823    XT_shared* shared = xt->shared;
824    const UInt n_xecu = VG_(sizeXA)(shared->xec);
825    const UInt n_data_xecu = VG_(sizeXA)(xt->data);
826    Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
827    UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
828 
829    vg_assert(n_data_xecu <= n_xecu);
830 
831    // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
832    ensure_ips_order_xecu_valid(shared);
833 
834    *top_total = 0;
835    DMSG(1, "iteration %u\n", n_xecu);
836    for (UInt i = 0; i < n_xecu; i++) {
837       Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
838       xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
839 
840       if (xecu >= n_data_xecu)
841          continue; // No data for this xecu in xt->data.
842       ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
843       if (ms_ec[n_xecu_sel].n_ips == 0)
844          continue;
845 
846       ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
847       ms_ec[n_xecu_sel].report_value
848          = (*report_value)(VG_(indexXA)(xt->data, xecu));
849       *top_total += ms_ec[n_xecu_sel].report_value;
850 
851       n_xecu_sel++;
852    }
853    vg_assert(n_xecu_sel <= n_xecu);
854 
855    *vms_ec = ms_ec;
856    *vn_ec = n_xecu_sel;
857 }
858 
VG_(XT_massif_open)859 MsFile* VG_(XT_massif_open)
860      (const HChar* outfilename,
861       const HChar* desc,
862       const XArray* desc_args,
863       const HChar* time_unit)
864 {
865    UInt i;
866    VgFile* fp = xt_open(outfilename);
867 
868    if (fp == NULL)
869       return NULL; // xt_open reported the error.
870 
871    /* ------ file header ------------------------------- */
872    FP("desc:");
873    if (desc)
874       FP(" %s", desc);
875    i = 0;
876    if (desc_args) {
877       for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
878          HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
879          FP(" %s", arg);
880       }
881    }
882    if (0 == i && desc == NULL) FP(" (none)");
883    FP("\n");
884 
885    FP_cmd(fp);
886 
887    FP("time_unit: %s\n", time_unit);
888 
889    return fp;
890 }
891 
VG_(XT_massif_close)892 void VG_(XT_massif_close)(MsFile* fp)
893 {
894    if (fp == NULL)
895       return; // Error should have been reported by  VG_(XT_massif_open)
896 
897    VG_(fclose)(fp);
898 }
899 
VG_(XT_massif_print)900 void VG_(XT_massif_print)
901      (MsFile* fp,
902       XTree* xt,
903       const Massif_Header* header,
904       ULong (*report_value)(const void* value))
905 {
906    UInt i;
907 
908    if (fp == NULL)
909       return; // Normally  VG_(XT_massif_open) already reported an error.
910 
911    /* Compute/prepare Snapshot totals/data/... */
912    ULong top_total;
913 
914    /* Following variables only used for detailed snapshot. */
915    UInt n_ec = 0;
916    Ms_Ec* ms_ec = NULL;
917    const HChar* kind =
918       header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
919 
920    DMSG(1, "XT_massif_print %s\n", kind);
921    if (header->detailed) {
922       /* Prepare the Ms_Ec sorted array of stacktraces and the groups
923          at level 0. */
924       prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
925       DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
926    } else if (xt == NULL) {
927       /* Non detailed, no xt => use the sz provided in the header. */
928       top_total = header->sz_B;
929    } else {
930       /* For non detailed snapshot, compute total directly from the xec. */
931       const XT_shared* shared = xt->shared;
932       const UInt n_xecu = VG_(sizeXA)(xt->data);
933       top_total = 0;
934 
935       for (UInt xecu = 0; xecu < n_xecu; xecu++) {
936          xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
937          if (xe->n_ips_sel == 0)
938             continue;
939          top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
940       }
941    }
942 
943    /* ------ snapshot header --------------------------- */
944    FP("#-----------\n");
945    FP("snapshot=%d\n", header->snapshot_n);
946    FP("#-----------\n");
947    FP("time=%lld\n", header->time);
948 
949    FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
950    FP("mem_heap_extra_B=%llu\n", header->extra_B);
951    FP("mem_stacks_B=%llu\n", header->stacks_B);
952    FP("heap_tree=%s\n", kind);
953 
954    /* ------ detailed snapshot data ----------------------------- */
955    if (header->detailed) {
956       UInt n_groups;
957       Ms_Group* groups;
958 
959       ULong sig_sz;
960       // Work out how big a child must be to be significant.  If the current
961       // top_total is zero, then we set it to 1, which means everything will be
962       // judged insignificant -- this is sensible, as there's no point showing
963       // any detail for this case.  Unless they used threshold=0, in which
964       // case we show them everything because that's what they asked for.
965       //
966       // Nb: We do this once now, rather than once per child, because if we do
967       // that the cost of all the divisions adds up to something significant.
968       if (0 == top_total && 0 != header->sig_threshold)
969          sig_sz = 1;
970       else
971          sig_sz = ((top_total + header->extra_B + header->stacks_B)
972                    * header->sig_threshold) / 100;
973 
974       /* Produce the groups at depth 0 */
975       DMSG(1, "XT_massif_print producing depth 0 groups\n");
976       ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
977 
978       /* Output the top node. */
979       FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
980 
981       /* Output depth 0 groups. */
982       DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups);
983       for (i = 0; i < n_groups; i++)
984          ms_output_group(fp, 0, 0, &groups[i], sig_sz, header->sig_threshold);
985 
986       VG_(free)(groups);
987       VG_(free)(ms_ec);
988    }
989 }
990 
VG_(XT_offset_main_or_below_main)991 Int VG_(XT_offset_main_or_below_main)(DiEpoch ep, Addr* ips, Int n_ips)
992 {
993    /* Search for main or below main function.
994       To limit the nr of ips to examine, we maintain the deepest
995       offset where main was found, and we first search main
996       from there.
997       If no main is found, we will then do a search for main or
998       below main function till the top. */
999    static Int deepest_main = 0;
1000    Vg_FnNameKind kind = Vg_FnNameNormal;
1001    Int mbm = n_ips - 1; // Position of deepest main or below main.
1002    Vg_FnNameKind mbmkind = Vg_FnNameNormal;
1003    Int i;
1004 
1005    for (i = n_ips - 1 - deepest_main;
1006         i < n_ips;
1007         i++) {
1008       mbmkind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1009       if (mbmkind != Vg_FnNameNormal) {
1010          mbm = i;
1011          break;
1012       }
1013    }
1014 
1015    /* Search for main or below main function till top. */
1016    for (i = mbm - 1;
1017         i >= 0 && mbmkind != Vg_FnNameMain;
1018         i--) {
1019       kind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1020       if (kind != Vg_FnNameNormal) {
1021          mbm = i;
1022          mbmkind = kind;
1023       }
1024    }
1025    if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
1026       if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
1027          deepest_main = n_ips - 1 - mbm;
1028       return mbm;
1029    } else
1030       return n_ips-1;
1031 }
1032 
VG_(XT_filter_1top_and_maybe_below_main)1033 void VG_(XT_filter_1top_and_maybe_below_main)
1034      (Addr* ips, Int n_ips,
1035       UInt* top, UInt* n_ips_sel)
1036 {
1037    Int mbm;
1038 
1039    *n_ips_sel = n_ips;
1040    if (n_ips == 0) {
1041       *top = 0;
1042       return;
1043    }
1044 
1045    /* Filter top function. */
1046    *top = 1;
1047 
1048    if (VG_(clo_show_below_main))
1049       mbm = n_ips - 1;
1050    else {
1051       // FIXME PW EPOCH : use the real ips epoch
1052       const DiEpoch cur_ep = VG_(current_DiEpoch)();
1053       mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1054    }
1055 
1056    *n_ips_sel = mbm - *top + 1;
1057 }
1058 
VG_(XT_filter_maybe_below_main)1059 void VG_(XT_filter_maybe_below_main)
1060      (Addr* ips, Int n_ips,
1061       UInt* top, UInt* n_ips_sel)
1062 {
1063    Int mbm;
1064 
1065    *n_ips_sel = n_ips;
1066    *top = 0;
1067    if (n_ips == 0)
1068       return;
1069 
1070    if (VG_(clo_show_below_main))
1071       mbm = n_ips - 1;
1072    else {
1073       // FIXME PW EPOCH : use the real ips epoch
1074       const DiEpoch cur_ep = VG_(current_DiEpoch)();
1075       mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1076    }
1077    *n_ips_sel = mbm - *top + 1;
1078 }
1079 
1080 /*--------------------------------------------------------------------*/
1081 /*--- end                                                m_xtree.c ---*/
1082 /*--------------------------------------------------------------------*/
1083