1 
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself.            ---*/
4 /*---                                                    cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Cachegrind, a Valgrind tool for cache
9    profiling programs.
10 
11    Copyright (C) 2002-2017 Nicholas Nethercote
12       njn@valgrind.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_tool_basics.h"
33 #include "pub_tool_debuginfo.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcfile.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_libcproc.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_xarray.h"
44 #include "pub_tool_clientstate.h"
45 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
46 
47 #include "cg_arch.h"
48 #include "cg_sim.c"
49 #include "cg_branchpred.c"
50 
51 /*------------------------------------------------------------*/
52 /*--- Constants                                            ---*/
53 /*------------------------------------------------------------*/
54 
55 /* Set to 1 for very verbose debugging */
56 #define DEBUG_CG 0
57 
58 /*------------------------------------------------------------*/
59 /*--- Options                                              ---*/
60 /*------------------------------------------------------------*/
61 
62 static Bool  clo_cache_sim  = True;  /* do cache simulation? */
63 static Bool  clo_branch_sim = False; /* do branch simulation? */
64 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
65 
66 /*------------------------------------------------------------*/
67 /*--- Cachesim configuration                               ---*/
68 /*------------------------------------------------------------*/
69 
70 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
71 
72 /*------------------------------------------------------------*/
73 /*--- Types and Data Structures                            ---*/
74 /*------------------------------------------------------------*/
75 
76 typedef
77    struct {
78       ULong a;  /* total # memory accesses of this kind */
79       ULong m1; /* misses in the first level cache */
80       ULong mL; /* misses in the second level cache */
81    }
82    CacheCC;
83 
84 typedef
85    struct {
86       ULong b;  /* total # branches of this kind */
87       ULong mp; /* number of branches mispredicted */
88    }
89    BranchCC;
90 
91 //------------------------------------------------------------
92 // Primary data structure #1: CC table
93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94 // - an ordered set of CCs.  CC indexing done by file/function/line (as
95 //   determined from the instrAddr).
96 // - Traversed for dumping stats at end in file/func/line hierarchy.
97 
98 typedef struct {
99    HChar* file;
100    const HChar* fn;
101    Int    line;
102 }
103 CodeLoc;
104 
105 typedef struct {
106    CodeLoc  loc; /* Source location that these counts pertain to */
107    CacheCC  Ir;  /* Insn read counts */
108    CacheCC  Dr;  /* Data read counts */
109    CacheCC  Dw;  /* Data write/modify counts */
110    BranchCC Bc;  /* Conditional branch counts */
111    BranchCC Bi;  /* Indirect branch counts */
112 } LineCC;
113 
114 // First compare file, then fn, then line.
cmp_CodeLoc_LineCC(const void * vloc,const void * vcc)115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
116 {
117    Word res;
118    const CodeLoc* a = (const CodeLoc*)vloc;
119    const CodeLoc* b = &(((const LineCC*)vcc)->loc);
120 
121    res = VG_(strcmp)(a->file, b->file);
122    if (0 != res)
123       return res;
124 
125    res = VG_(strcmp)(a->fn, b->fn);
126    if (0 != res)
127       return res;
128 
129    return a->line - b->line;
130 }
131 
132 static OSet* CC_table;
133 
134 //------------------------------------------------------------
135 // Primary data structure #2: InstrInfo table
136 // - Holds the cached info about each instr that is used for simulation.
137 // - table(SB_start_addr, list(InstrInfo))
138 // - For each SB, each InstrInfo in the list holds info about the
139 //   instruction (instrLen, instrAddr, etc), plus a pointer to its line
140 //   CC.  This node is what's passed to the simulation function.
141 // - When SBs are discarded the relevant list(instr_details) is freed.
142 
143 typedef struct _InstrInfo InstrInfo;
144 struct _InstrInfo {
145    Addr    instr_addr;
146    UChar   instr_len;
147    LineCC* parent;         // parent line-CC
148 };
149 
150 typedef struct _SB_info SB_info;
151 struct _SB_info {
152    Addr      SB_addr;      // key;  MUST BE FIRST
153    Int       n_instrs;
154    InstrInfo instrs[0];
155 };
156 
157 static OSet* instrInfoTable;
158 
159 //------------------------------------------------------------
160 // Secondary data structure: string table
161 // - holds strings, avoiding dups
162 // - used for filenames and function names, each of which will be
163 //   pointed to by one or more CCs.
164 // - it also allows equality checks just by pointer comparison, which
165 //   is good when printing the output file at the end.
166 
167 static OSet* stringTable;
168 
169 //------------------------------------------------------------
170 // Stats
171 static Int  distinct_files      = 0;
172 static Int  distinct_fns        = 0;
173 static Int  distinct_lines      = 0;
174 static Int  distinct_instrsGen  = 0;
175 static Int  distinct_instrsNoX  = 0;
176 
177 static Int  full_debugs         = 0;
178 static Int  file_line_debugs    = 0;
179 static Int  fn_debugs           = 0;
180 static Int  no_debugs           = 0;
181 
182 /*------------------------------------------------------------*/
183 /*--- String table operations                              ---*/
184 /*------------------------------------------------------------*/
185 
stringCmp(const void * key,const void * elem)186 static Word stringCmp( const void* key, const void* elem )
187 {
188    return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
189 }
190 
191 // Get a permanent string;  either pull it out of the string table if it's
192 // been encountered before, or dup it and put it into the string table.
get_perm_string(const HChar * s)193 static HChar* get_perm_string(const HChar* s)
194 {
195    HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
196    if (s_ptr) {
197       return *s_ptr;
198    } else {
199       HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
200       *s_node = VG_(strdup)("cg.main.gps.1", s);
201       VG_(OSetGen_Insert)(stringTable, s_node);
202       return *s_node;
203    }
204 }
205 
206 /*------------------------------------------------------------*/
207 /*--- CC table operations                                  ---*/
208 /*------------------------------------------------------------*/
209 
get_debug_info(Addr instr_addr,const HChar ** dir,const HChar ** file,const HChar ** fn,UInt * line)210 static void get_debug_info(Addr instr_addr, const HChar **dir,
211                            const HChar **file, const HChar **fn, UInt* line)
212 {
213    DiEpoch ep = VG_(current_DiEpoch)();
214    Bool found_file_line = VG_(get_filename_linenum)(
215                              ep,
216                              instr_addr,
217                              file, dir,
218                              line
219                           );
220    Bool found_fn        = VG_(get_fnname)(ep, instr_addr, fn);
221 
222    if (!found_file_line) {
223       *file = "???";
224       *line = 0;
225    }
226    if (!found_fn) {
227       *fn = "???";
228    }
229 
230    if (found_file_line) {
231       if (found_fn) full_debugs++;
232       else          file_line_debugs++;
233    } else {
234       if (found_fn) fn_debugs++;
235       else          no_debugs++;
236    }
237 }
238 
239 // Do a three step traversal: by file, then fn, then line.
240 // Returns a pointer to the line CC, creates a new one if necessary.
get_lineCC(Addr origAddr)241 static LineCC* get_lineCC(Addr origAddr)
242 {
243    const HChar *fn, *file, *dir;
244    UInt    line;
245    CodeLoc loc;
246    LineCC* lineCC;
247 
248    get_debug_info(origAddr, &dir, &file, &fn, &line);
249 
250    // Form an absolute pathname if a directory is available
251    HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
252 
253    if (dir[0]) {
254       VG_(sprintf)(absfile, "%s/%s", dir, file);
255    } else {
256       VG_(sprintf)(absfile, "%s", file);
257    }
258 
259    loc.file = absfile;
260    loc.fn   = fn;
261    loc.line = line;
262 
263    lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
264    if (!lineCC) {
265       // Allocate and zero a new node.
266       lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
267       lineCC->loc.file = get_perm_string(loc.file);
268       lineCC->loc.fn   = get_perm_string(loc.fn);
269       lineCC->loc.line = loc.line;
270       lineCC->Ir.a     = 0;
271       lineCC->Ir.m1    = 0;
272       lineCC->Ir.mL    = 0;
273       lineCC->Dr.a     = 0;
274       lineCC->Dr.m1    = 0;
275       lineCC->Dr.mL    = 0;
276       lineCC->Dw.a     = 0;
277       lineCC->Dw.m1    = 0;
278       lineCC->Dw.mL    = 0;
279       lineCC->Bc.b     = 0;
280       lineCC->Bc.mp    = 0;
281       lineCC->Bi.b     = 0;
282       lineCC->Bi.mp    = 0;
283       VG_(OSetGen_Insert)(CC_table, lineCC);
284    }
285 
286    return lineCC;
287 }
288 
289 /*------------------------------------------------------------*/
290 /*--- Cache simulation functions                           ---*/
291 /*------------------------------------------------------------*/
292 
293 /* A common case for an instruction read event is that the
294  * bytes read belong to the same cache line in both L1I and LL
295  * (if cache line sizes of L1 and LL are the same).
296  * As this can be detected at instrumentation time, and results
297  * in faster simulation, special-casing is benefical.
298  *
299  * Abbreviations used in var/function names:
300  *  IrNoX - instruction read does not cross cache lines
301  *  IrGen - generic instruction read; not detected as IrNoX
302  *  Ir    - not known / not important whether it is an IrNoX
303  */
304 
305 // Only used with --cache-sim=no.
306 static VG_REGPARM(1)
log_1Ir(InstrInfo * n)307 void log_1Ir(InstrInfo* n)
308 {
309    n->parent->Ir.a++;
310 }
311 
312 // Only used with --cache-sim=no.
313 static VG_REGPARM(2)
log_2Ir(InstrInfo * n,InstrInfo * n2)314 void log_2Ir(InstrInfo* n, InstrInfo* n2)
315 {
316    n->parent->Ir.a++;
317    n2->parent->Ir.a++;
318 }
319 
320 // Only used with --cache-sim=no.
321 static VG_REGPARM(3)
log_3Ir(InstrInfo * n,InstrInfo * n2,InstrInfo * n3)322 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
323 {
324    n->parent->Ir.a++;
325    n2->parent->Ir.a++;
326    n3->parent->Ir.a++;
327 }
328 
329 // Generic case for instruction reads: may cross cache lines.
330 // All other Ir handlers expect IrNoX instruction reads.
331 static VG_REGPARM(1)
log_1IrGen_0D_cache_access(InstrInfo * n)332 void log_1IrGen_0D_cache_access(InstrInfo* n)
333 {
334    //VG_(printf)("1IrGen_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
335    //             n, n->instr_addr, n->instr_len);
336    cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
337 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
338    n->parent->Ir.a++;
339 }
340 
341 static VG_REGPARM(1)
log_1IrNoX_0D_cache_access(InstrInfo * n)342 void log_1IrNoX_0D_cache_access(InstrInfo* n)
343 {
344    //VG_(printf)("1IrNoX_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
345    //             n, n->instr_addr, n->instr_len);
346    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
347 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
348    n->parent->Ir.a++;
349 }
350 
351 static VG_REGPARM(2)
log_2IrNoX_0D_cache_access(InstrInfo * n,InstrInfo * n2)352 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
353 {
354    //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
355    //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
356    //            n,  n->instr_addr,  n->instr_len,
357    //            n2, n2->instr_addr, n2->instr_len);
358    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
359 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
360    n->parent->Ir.a++;
361    cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
362 			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
363    n2->parent->Ir.a++;
364 }
365 
366 static VG_REGPARM(3)
log_3IrNoX_0D_cache_access(InstrInfo * n,InstrInfo * n2,InstrInfo * n3)367 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
368 {
369    //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
370    //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
371    //            "            CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
372    //            n,  n->instr_addr,  n->instr_len,
373    //            n2, n2->instr_addr, n2->instr_len,
374    //            n3, n3->instr_addr, n3->instr_len);
375    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
376 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
377    n->parent->Ir.a++;
378    cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
379 			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
380    n2->parent->Ir.a++;
381    cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
382 			 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
383    n3->parent->Ir.a++;
384 }
385 
386 static VG_REGPARM(3)
log_1IrNoX_1Dr_cache_access(InstrInfo * n,Addr data_addr,Word data_size)387 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
388 {
389    //VG_(printf)("1IrNoX_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
390    //            "                               daddr=0x%010lx,  dsize=%lu\n",
391    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
392    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
393 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
394    n->parent->Ir.a++;
395 
396    cachesim_D1_doref(data_addr, data_size,
397                      &n->parent->Dr.m1, &n->parent->Dr.mL);
398    n->parent->Dr.a++;
399 }
400 
401 static VG_REGPARM(3)
log_1IrNoX_1Dw_cache_access(InstrInfo * n,Addr data_addr,Word data_size)402 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
403 {
404    //VG_(printf)("1IrNoX_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
405    //            "                               daddr=0x%010lx,  dsize=%lu\n",
406    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
407    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
408 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
409    n->parent->Ir.a++;
410 
411    cachesim_D1_doref(data_addr, data_size,
412                      &n->parent->Dw.m1, &n->parent->Dw.mL);
413    n->parent->Dw.a++;
414 }
415 
416 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
417    and log_0Ir_1Dw_cache_access have exactly the same prototype.  If
418    you change them, you must change addEvent_D_guarded too. */
419 static VG_REGPARM(3)
log_0Ir_1Dr_cache_access(InstrInfo * n,Addr data_addr,Word data_size)420 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
421 {
422    //VG_(printf)("0Ir_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
423    //            n, data_addr, data_size);
424    cachesim_D1_doref(data_addr, data_size,
425                      &n->parent->Dr.m1, &n->parent->Dr.mL);
426    n->parent->Dr.a++;
427 }
428 
429 /* See comment on log_0Ir_1Dr_cache_access. */
430 static VG_REGPARM(3)
log_0Ir_1Dw_cache_access(InstrInfo * n,Addr data_addr,Word data_size)431 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
432 {
433    //VG_(printf)("0Ir_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
434    //            n, data_addr, data_size);
435    cachesim_D1_doref(data_addr, data_size,
436                      &n->parent->Dw.m1, &n->parent->Dw.mL);
437    n->parent->Dw.a++;
438 }
439 
440 /* For branches, we consult two different predictors, one which
441    predicts taken/untaken for conditional branches, and the other
442    which predicts the branch target address for indirect branches
443    (jump-to-register style ones). */
444 
445 static VG_REGPARM(2)
log_cond_branch(InstrInfo * n,Word taken)446 void log_cond_branch(InstrInfo* n, Word taken)
447 {
448    //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
449    //             n, taken);
450    n->parent->Bc.b++;
451    n->parent->Bc.mp
452       += (1 & do_cond_branch_predict(n->instr_addr, taken));
453 }
454 
455 static VG_REGPARM(2)
log_ind_branch(InstrInfo * n,UWord actual_dst)456 void log_ind_branch(InstrInfo* n, UWord actual_dst)
457 {
458    //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
459    //             n, actual_dst);
460    n->parent->Bi.b++;
461    n->parent->Bi.mp
462       += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
463 }
464 
465 
466 /*------------------------------------------------------------*/
467 /*--- Instrumentation types and structures                 ---*/
468 /*------------------------------------------------------------*/
469 
470 /* Maintain an ordered list of memory events which are outstanding, in
471    the sense that no IR has yet been generated to do the relevant
472    helper calls.  The BB is scanned top to bottom and memory events
473    are added to the end of the list, merging with the most recent
474    notified event where possible (Dw immediately following Dr and
475    having the same size and EA can be merged).
476 
477    This merging is done so that for architectures which have
478    load-op-store instructions (x86, amd64), the insn is treated as if
479    it makes just one memory reference (a modify), rather than two (a
480    read followed by a write at the same address).
481 
482    At various points the list will need to be flushed, that is, IR
483    generated from it.  That must happen before any possible exit from
484    the block (the end, or an IRStmt_Exit).  Flushing also takes place
485    when there is no space to add a new event.
486 
487    If we require the simulation statistics to be up to date with
488    respect to possible memory exceptions, then the list would have to
489    be flushed before each memory reference.  That would however lose
490    performance by inhibiting event-merging during flushing.
491 
492    Flushing the list consists of walking it start to end and emitting
493    instrumentation IR for each event, in the order in which they
494    appear.  It may be possible to emit a single call for two adjacent
495    events in order to reduce the number of helper function calls made.
496    For example, it could well be profitable to handle two adjacent Ir
497    events with a single helper call.  */
498 
499 typedef
500    IRExpr
501    IRAtom;
502 
503 typedef
504    enum {
505       Ev_IrNoX,  // Instruction read not crossing cache lines
506       Ev_IrGen,  // Generic Ir, not being detected as IrNoX
507       Ev_Dr,     // Data read
508       Ev_Dw,     // Data write
509       Ev_Dm,     // Data modify (read then write)
510       Ev_Bc,     // branch conditional
511       Ev_Bi      // branch indirect (to unknown destination)
512    }
513    EventTag;
514 
515 typedef
516    struct {
517       EventTag   tag;
518       InstrInfo* inode;
519       union {
520          struct {
521          } IrGen;
522          struct {
523          } IrNoX;
524          struct {
525             IRAtom* ea;
526             Int     szB;
527          } Dr;
528          struct {
529             IRAtom* ea;
530             Int     szB;
531          } Dw;
532          struct {
533             IRAtom* ea;
534             Int     szB;
535          } Dm;
536          struct {
537             IRAtom* taken; /* :: Ity_I1 */
538          } Bc;
539          struct {
540             IRAtom* dst;
541          } Bi;
542       } Ev;
543    }
544    Event;
545 
init_Event(Event * ev)546 static void init_Event ( Event* ev ) {
547    VG_(memset)(ev, 0, sizeof(Event));
548 }
549 
get_Event_dea(Event * ev)550 static IRAtom* get_Event_dea ( Event* ev ) {
551    switch (ev->tag) {
552       case Ev_Dr: return ev->Ev.Dr.ea;
553       case Ev_Dw: return ev->Ev.Dw.ea;
554       case Ev_Dm: return ev->Ev.Dm.ea;
555       default:    tl_assert(0);
556    }
557 }
558 
get_Event_dszB(Event * ev)559 static Int get_Event_dszB ( Event* ev ) {
560    switch (ev->tag) {
561       case Ev_Dr: return ev->Ev.Dr.szB;
562       case Ev_Dw: return ev->Ev.Dw.szB;
563       case Ev_Dm: return ev->Ev.Dm.szB;
564       default:    tl_assert(0);
565    }
566 }
567 
568 
569 /* Up to this many unnotified events are allowed.  Number is
570    arbitrary.  Larger numbers allow more event merging to occur, but
571    potentially induce more spilling due to extending live ranges of
572    address temporaries. */
573 #define N_EVENTS 16
574 
575 
576 /* A struct which holds all the running state during instrumentation.
577    Mostly to avoid passing loads of parameters everywhere. */
578 typedef
579    struct {
580       /* The current outstanding-memory-event list. */
581       Event events[N_EVENTS];
582       Int   events_used;
583 
584       /* The array of InstrInfo bins for the BB. */
585       SB_info* sbInfo;
586 
587       /* Number InstrInfo bins 'used' so far. */
588       Int sbInfo_i;
589 
590       /* The output SB being constructed. */
591       IRSB* sbOut;
592    }
593    CgState;
594 
595 
596 /*------------------------------------------------------------*/
597 /*--- Instrumentation main                                 ---*/
598 /*------------------------------------------------------------*/
599 
600 // Note that origAddr is the real origAddr, not the address of the first
601 // instruction in the block (they can be different due to redirection).
602 static
get_SB_info(IRSB * sbIn,Addr origAddr)603 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
604 {
605    Int      i, n_instrs;
606    IRStmt*  st;
607    SB_info* sbInfo;
608 
609    // Count number of original instrs in SB
610    n_instrs = 0;
611    for (i = 0; i < sbIn->stmts_used; i++) {
612       st = sbIn->stmts[i];
613       if (Ist_IMark == st->tag) n_instrs++;
614    }
615 
616    // Check that we don't have an entry for this BB in the instr-info table.
617    // If this assertion fails, there has been some screwup:  some
618    // translations must have been discarded but Cachegrind hasn't discarded
619    // the corresponding entries in the instr-info table.
620    sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
621    tl_assert(NULL == sbInfo);
622 
623    // BB never translated before (at this address, at least;  could have
624    // been unloaded and then reloaded elsewhere in memory)
625    sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
626                                 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
627    sbInfo->SB_addr  = origAddr;
628    sbInfo->n_instrs = n_instrs;
629    VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
630 
631    return sbInfo;
632 }
633 
634 
showEvent(Event * ev)635 static void showEvent ( Event* ev )
636 {
637    switch (ev->tag) {
638       case Ev_IrGen:
639          VG_(printf)("IrGen %p\n", ev->inode);
640          break;
641       case Ev_IrNoX:
642          VG_(printf)("IrNoX %p\n", ev->inode);
643          break;
644       case Ev_Dr:
645          VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
646          ppIRExpr(ev->Ev.Dr.ea);
647          VG_(printf)("\n");
648          break;
649       case Ev_Dw:
650          VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
651          ppIRExpr(ev->Ev.Dw.ea);
652          VG_(printf)("\n");
653          break;
654       case Ev_Dm:
655          VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
656          ppIRExpr(ev->Ev.Dm.ea);
657          VG_(printf)("\n");
658          break;
659       case Ev_Bc:
660          VG_(printf)("Bc %p   GA=", ev->inode);
661          ppIRExpr(ev->Ev.Bc.taken);
662          VG_(printf)("\n");
663          break;
664       case Ev_Bi:
665          VG_(printf)("Bi %p  DST=", ev->inode);
666          ppIRExpr(ev->Ev.Bi.dst);
667          VG_(printf)("\n");
668          break;
669       default:
670          tl_assert(0);
671          break;
672    }
673 }
674 
675 // Reserve and initialise an InstrInfo for the first mention of a new insn.
676 static
setup_InstrInfo(CgState * cgs,Addr instr_addr,UInt instr_len)677 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
678 {
679    InstrInfo* i_node;
680    tl_assert(cgs->sbInfo_i >= 0);
681    tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
682    i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
683    i_node->instr_addr = instr_addr;
684    i_node->instr_len  = instr_len;
685    i_node->parent     = get_lineCC(instr_addr);
686    cgs->sbInfo_i++;
687    return i_node;
688 }
689 
690 
691 /* Generate code for all outstanding memory events, and mark the queue
692    empty.  Code is generated into cgs->bbOut, and this activity
693    'consumes' slots in cgs->sbInfo. */
694 
flushEvents(CgState * cgs)695 static void flushEvents ( CgState* cgs )
696 {
697    Int        i, regparms;
698    const HChar* helperName;
699    void*      helperAddr;
700    IRExpr**   argv;
701    IRExpr*    i_node_expr;
702    IRDirty*   di;
703    Event*     ev;
704    Event*     ev2;
705    Event*     ev3;
706 
707    i = 0;
708    while (i < cgs->events_used) {
709 
710       helperName = NULL;
711       helperAddr = NULL;
712       argv       = NULL;
713       regparms   = 0;
714 
715       /* generate IR to notify event i and possibly the ones
716          immediately following it. */
717       tl_assert(i >= 0 && i < cgs->events_used);
718 
719       ev  = &cgs->events[i];
720       ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
721       ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
722 
723       if (DEBUG_CG) {
724          VG_(printf)("   flush ");
725          showEvent( ev );
726       }
727 
728       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
729 
730       /* Decide on helper fn to call and args to pass it, and advance
731          i appropriately. */
732       switch (ev->tag) {
733          case Ev_IrNoX:
734             /* Merge an IrNoX with a following Dr/Dm. */
735             if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
736                /* Why is this true?  It's because we're merging an Ir
737                   with a following Dr or Dm.  The Ir derives from the
738                   instruction's IMark and the Dr/Dm from data
739                   references which follow it.  In short it holds
740                   because each insn starts with an IMark, hence an
741                   Ev_Ir, and so these Dr/Dm must pertain to the
742                   immediately preceding Ir.  Same applies to analogous
743                   assertions in the subsequent cases. */
744                tl_assert(ev2->inode == ev->inode);
745                helperName = "log_1IrNoX_1Dr_cache_access";
746                helperAddr = &log_1IrNoX_1Dr_cache_access;
747                argv = mkIRExprVec_3( i_node_expr,
748                                      get_Event_dea(ev2),
749                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
750                regparms = 3;
751                i += 2;
752             }
753             /* Merge an IrNoX with a following Dw. */
754             else
755             if (ev2 && ev2->tag == Ev_Dw) {
756                tl_assert(ev2->inode == ev->inode);
757                helperName = "log_1IrNoX_1Dw_cache_access";
758                helperAddr = &log_1IrNoX_1Dw_cache_access;
759                argv = mkIRExprVec_3( i_node_expr,
760                                      get_Event_dea(ev2),
761                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
762                regparms = 3;
763                i += 2;
764             }
765             /* Merge an IrNoX with two following IrNoX's. */
766             else
767             if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
768             {
769                if (clo_cache_sim) {
770                   helperName = "log_3IrNoX_0D_cache_access";
771                   helperAddr = &log_3IrNoX_0D_cache_access;
772                } else {
773                   helperName = "log_3Ir";
774                   helperAddr = &log_3Ir;
775                }
776                argv = mkIRExprVec_3( i_node_expr,
777                                      mkIRExpr_HWord( (HWord)ev2->inode ),
778                                      mkIRExpr_HWord( (HWord)ev3->inode ) );
779                regparms = 3;
780                i += 3;
781             }
782             /* Merge an IrNoX with one following IrNoX. */
783             else
784             if (ev2 && ev2->tag == Ev_IrNoX) {
785                if (clo_cache_sim) {
786                   helperName = "log_2IrNoX_0D_cache_access";
787                   helperAddr = &log_2IrNoX_0D_cache_access;
788                } else {
789                   helperName = "log_2Ir";
790                   helperAddr = &log_2Ir;
791                }
792                argv = mkIRExprVec_2( i_node_expr,
793                                      mkIRExpr_HWord( (HWord)ev2->inode ) );
794                regparms = 2;
795                i += 2;
796             }
797             /* No merging possible; emit as-is. */
798             else {
799                if (clo_cache_sim) {
800                   helperName = "log_1IrNoX_0D_cache_access";
801                   helperAddr = &log_1IrNoX_0D_cache_access;
802                } else {
803                   helperName = "log_1Ir";
804                   helperAddr = &log_1Ir;
805                }
806                argv = mkIRExprVec_1( i_node_expr );
807                regparms = 1;
808                i++;
809             }
810             break;
811          case Ev_IrGen:
812             if (clo_cache_sim) {
813 	       helperName = "log_1IrGen_0D_cache_access";
814 	       helperAddr = &log_1IrGen_0D_cache_access;
815 	    } else {
816 	       helperName = "log_1Ir";
817 	       helperAddr = &log_1Ir;
818 	    }
819 	    argv = mkIRExprVec_1( i_node_expr );
820 	    regparms = 1;
821 	    i++;
822             break;
823          case Ev_Dr:
824          case Ev_Dm:
825             /* Data read or modify */
826             helperName = "log_0Ir_1Dr_cache_access";
827             helperAddr = &log_0Ir_1Dr_cache_access;
828             argv = mkIRExprVec_3( i_node_expr,
829                                   get_Event_dea(ev),
830                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
831             regparms = 3;
832             i++;
833             break;
834          case Ev_Dw:
835             /* Data write */
836             helperName = "log_0Ir_1Dw_cache_access";
837             helperAddr = &log_0Ir_1Dw_cache_access;
838             argv = mkIRExprVec_3( i_node_expr,
839                                   get_Event_dea(ev),
840                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
841             regparms = 3;
842             i++;
843             break;
844          case Ev_Bc:
845             /* Conditional branch */
846             helperName = "log_cond_branch";
847             helperAddr = &log_cond_branch;
848             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
849             regparms = 2;
850             i++;
851             break;
852          case Ev_Bi:
853             /* Branch to an unknown destination */
854             helperName = "log_ind_branch";
855             helperAddr = &log_ind_branch;
856             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
857             regparms = 2;
858             i++;
859             break;
860          default:
861             tl_assert(0);
862       }
863 
864       /* Add the helper. */
865       tl_assert(helperName);
866       tl_assert(helperAddr);
867       tl_assert(argv);
868       di = unsafeIRDirty_0_N( regparms,
869                               helperName, VG_(fnptr_to_fnentry)( helperAddr ),
870                               argv );
871       addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
872    }
873 
874    cgs->events_used = 0;
875 }
876 
addEvent_Ir(CgState * cgs,InstrInfo * inode)877 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
878 {
879    Event* evt;
880    if (cgs->events_used == N_EVENTS)
881       flushEvents(cgs);
882    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
883    evt = &cgs->events[cgs->events_used];
884    init_Event(evt);
885    evt->inode    = inode;
886    if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
887       evt->tag = Ev_IrNoX;
888       distinct_instrsNoX++;
889    } else {
890       evt->tag = Ev_IrGen;
891       distinct_instrsGen++;
892    }
893    cgs->events_used++;
894 }
895 
896 static
addEvent_Dr(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea)897 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
898 {
899    Event* evt;
900    tl_assert(isIRAtom(ea));
901    tl_assert(datasize >= 1 && datasize <= min_line_size);
902    if (!clo_cache_sim)
903       return;
904    if (cgs->events_used == N_EVENTS)
905       flushEvents(cgs);
906    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
907    evt = &cgs->events[cgs->events_used];
908    init_Event(evt);
909    evt->tag       = Ev_Dr;
910    evt->inode     = inode;
911    evt->Ev.Dr.szB = datasize;
912    evt->Ev.Dr.ea  = ea;
913    cgs->events_used++;
914 }
915 
916 static
addEvent_Dw(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea)917 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
918 {
919    Event* evt;
920 
921    tl_assert(isIRAtom(ea));
922    tl_assert(datasize >= 1 && datasize <= min_line_size);
923 
924    if (!clo_cache_sim)
925       return;
926 
927    /* Is it possible to merge this write with the preceding read? */
928    if (cgs->events_used > 0) {
929       Event* lastEvt = &cgs->events[cgs->events_used-1];
930       if (   lastEvt->tag       == Ev_Dr
931           && lastEvt->Ev.Dr.szB == datasize
932           && lastEvt->inode     == inode
933           && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
934       {
935          lastEvt->tag   = Ev_Dm;
936          return;
937       }
938    }
939 
940    /* No.  Add as normal. */
941    if (cgs->events_used == N_EVENTS)
942       flushEvents(cgs);
943    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
944    evt = &cgs->events[cgs->events_used];
945    init_Event(evt);
946    evt->tag       = Ev_Dw;
947    evt->inode     = inode;
948    evt->Ev.Dw.szB = datasize;
949    evt->Ev.Dw.ea  = ea;
950    cgs->events_used++;
951 }
952 
953 static
addEvent_D_guarded(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea,IRAtom * guard,Bool isWrite)954 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
955                           Int datasize, IRAtom* ea, IRAtom* guard,
956                           Bool isWrite )
957 {
958    tl_assert(isIRAtom(ea));
959    tl_assert(guard);
960    tl_assert(isIRAtom(guard));
961    tl_assert(datasize >= 1 && datasize <= min_line_size);
962 
963    if (!clo_cache_sim)
964       return;
965 
966    /* Adding guarded memory actions and merging them with the existing
967       queue is too complex.  Simply flush the queue and add this
968       action immediately.  Since guarded loads and stores are pretty
969       rare, this is not thought likely to cause any noticeable
970       performance loss as a result of the loss of event-merging
971       opportunities. */
972    tl_assert(cgs->events_used >= 0);
973    flushEvents(cgs);
974    tl_assert(cgs->events_used == 0);
975    /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
976    IRExpr*      i_node_expr;
977    const HChar* helperName;
978    void*        helperAddr;
979    IRExpr**     argv;
980    Int          regparms;
981    IRDirty*     di;
982    i_node_expr = mkIRExpr_HWord( (HWord)inode );
983    helperName  = isWrite ? "log_0Ir_1Dw_cache_access"
984                          : "log_0Ir_1Dr_cache_access";
985    helperAddr  = isWrite ? &log_0Ir_1Dw_cache_access
986                          : &log_0Ir_1Dr_cache_access;
987    argv        = mkIRExprVec_3( i_node_expr,
988                                 ea, mkIRExpr_HWord( datasize ) );
989    regparms    = 3;
990    di          = unsafeIRDirty_0_N(
991                     regparms,
992                     helperName, VG_(fnptr_to_fnentry)( helperAddr ),
993                     argv );
994    di->guard = guard;
995    addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
996 }
997 
998 
999 static
addEvent_Bc(CgState * cgs,InstrInfo * inode,IRAtom * guard)1000 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
1001 {
1002    Event* evt;
1003    tl_assert(isIRAtom(guard));
1004    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1005              == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
1006    if (!clo_branch_sim)
1007       return;
1008    if (cgs->events_used == N_EVENTS)
1009       flushEvents(cgs);
1010    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1011    evt = &cgs->events[cgs->events_used];
1012    init_Event(evt);
1013    evt->tag         = Ev_Bc;
1014    evt->inode       = inode;
1015    evt->Ev.Bc.taken = guard;
1016    cgs->events_used++;
1017 }
1018 
1019 static
addEvent_Bi(CgState * cgs,InstrInfo * inode,IRAtom * whereTo)1020 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1021 {
1022    Event* evt;
1023    tl_assert(isIRAtom(whereTo));
1024    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1025              == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
1026    if (!clo_branch_sim)
1027       return;
1028    if (cgs->events_used == N_EVENTS)
1029       flushEvents(cgs);
1030    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1031    evt = &cgs->events[cgs->events_used];
1032    init_Event(evt);
1033    evt->tag       = Ev_Bi;
1034    evt->inode     = inode;
1035    evt->Ev.Bi.dst = whereTo;
1036    cgs->events_used++;
1037 }
1038 
1039 ////////////////////////////////////////////////////////////
1040 
1041 
1042 static
cg_instrument(VgCallbackClosure * closure,IRSB * sbIn,const VexGuestLayout * layout,const VexGuestExtents * vge,const VexArchInfo * archinfo_host,IRType gWordTy,IRType hWordTy)1043 IRSB* cg_instrument ( VgCallbackClosure* closure,
1044                       IRSB* sbIn,
1045                       const VexGuestLayout* layout,
1046                       const VexGuestExtents* vge,
1047                       const VexArchInfo* archinfo_host,
1048                       IRType gWordTy, IRType hWordTy )
1049 {
1050    Int        i;
1051    UInt       isize;
1052    IRStmt*    st;
1053    Addr       cia; /* address of current insn */
1054    CgState    cgs;
1055    IRTypeEnv* tyenv = sbIn->tyenv;
1056    InstrInfo* curr_inode = NULL;
1057 
1058    if (gWordTy != hWordTy) {
1059       /* We don't currently support this case. */
1060       VG_(tool_panic)("host/guest word size mismatch");
1061    }
1062 
1063    // Set up new SB
1064    cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1065 
1066    // Copy verbatim any IR preamble preceding the first IMark
1067    i = 0;
1068    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1069       addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1070       i++;
1071    }
1072 
1073    // Get the first statement, and initial cia from it
1074    tl_assert(sbIn->stmts_used > 0);
1075    tl_assert(i < sbIn->stmts_used);
1076    st = sbIn->stmts[i];
1077    tl_assert(Ist_IMark == st->tag);
1078 
1079    cia   = st->Ist.IMark.addr;
1080    isize = st->Ist.IMark.len;
1081    // If Vex fails to decode an instruction, the size will be zero.
1082    // Pretend otherwise.
1083    if (isize == 0) isize = VG_MIN_INSTR_SZB;
1084 
1085    // Set up running state and get block info
1086    tl_assert(closure->readdr == vge->base[0]);
1087    cgs.events_used = 0;
1088    cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
1089    cgs.sbInfo_i    = 0;
1090 
1091    if (DEBUG_CG)
1092       VG_(printf)("\n\n---------- cg_instrument ----------\n");
1093 
1094    // Traverse the block, initialising inodes, adding events and flushing as
1095    // necessary.
1096    for (/*use current i*/; i < sbIn->stmts_used; i++) {
1097 
1098       st = sbIn->stmts[i];
1099       tl_assert(isFlatIRStmt(st));
1100 
1101       switch (st->tag) {
1102          case Ist_NoOp:
1103          case Ist_AbiHint:
1104          case Ist_Put:
1105          case Ist_PutI:
1106          case Ist_MBE:
1107             break;
1108 
1109          case Ist_IMark:
1110             cia   = st->Ist.IMark.addr;
1111             isize = st->Ist.IMark.len;
1112 
1113             // If Vex fails to decode an instruction, the size will be zero.
1114             // Pretend otherwise.
1115             if (isize == 0) isize = VG_MIN_INSTR_SZB;
1116 
1117             // Sanity-check size.
1118             tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1119                      || VG_CLREQ_SZB == isize );
1120 
1121             // Get space for and init the inode, record it as the current one.
1122             // Subsequent Dr/Dw/Dm events from the same instruction will
1123             // also use it.
1124             curr_inode = setup_InstrInfo(&cgs, cia, isize);
1125 
1126             addEvent_Ir( &cgs, curr_inode );
1127             break;
1128 
1129          case Ist_WrTmp: {
1130             IRExpr* data = st->Ist.WrTmp.data;
1131             if (data->tag == Iex_Load) {
1132                IRExpr* aexpr = data->Iex.Load.addr;
1133                // Note also, endianness info is ignored.  I guess
1134                // that's not interesting.
1135                addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1136                                   aexpr );
1137             }
1138             break;
1139          }
1140 
1141          case Ist_Store: {
1142             IRExpr* data  = st->Ist.Store.data;
1143             IRExpr* aexpr = st->Ist.Store.addr;
1144             addEvent_Dw( &cgs, curr_inode,
1145                          sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1146             break;
1147          }
1148 
1149          case Ist_StoreG: {
1150             IRStoreG* sg   = st->Ist.StoreG.details;
1151             IRExpr*   data = sg->data;
1152             IRExpr*   addr = sg->addr;
1153             IRType    type = typeOfIRExpr(tyenv, data);
1154             tl_assert(type != Ity_INVALID);
1155             addEvent_D_guarded( &cgs, curr_inode,
1156                                 sizeofIRType(type), addr, sg->guard,
1157                                 True/*isWrite*/ );
1158             break;
1159          }
1160 
1161          case Ist_LoadG: {
1162             IRLoadG* lg       = st->Ist.LoadG.details;
1163             IRType   type     = Ity_INVALID; /* loaded type */
1164             IRType   typeWide = Ity_INVALID; /* after implicit widening */
1165             IRExpr*  addr     = lg->addr;
1166             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1167             tl_assert(type != Ity_INVALID);
1168             addEvent_D_guarded( &cgs, curr_inode,
1169                                 sizeofIRType(type), addr, lg->guard,
1170                                 False/*!isWrite*/ );
1171             break;
1172          }
1173 
1174          case Ist_Dirty: {
1175             Int      dataSize;
1176             IRDirty* d = st->Ist.Dirty.details;
1177             if (d->mFx != Ifx_None) {
1178                /* This dirty helper accesses memory.  Collect the details. */
1179                tl_assert(d->mAddr != NULL);
1180                tl_assert(d->mSize != 0);
1181                dataSize = d->mSize;
1182                // Large (eg. 28B, 108B, 512B on x86) data-sized
1183                // instructions will be done inaccurately, but they're
1184                // very rare and this avoids errors from hitting more
1185                // than two cache lines in the simulation.
1186                if (dataSize > min_line_size)
1187                   dataSize = min_line_size;
1188                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1189                   addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1190                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1191                   addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1192             } else {
1193                tl_assert(d->mAddr == NULL);
1194                tl_assert(d->mSize == 0);
1195             }
1196             break;
1197          }
1198 
1199          case Ist_CAS: {
1200             /* We treat it as a read and a write of the location.  I
1201                think that is the same behaviour as it was before IRCAS
1202                was introduced, since prior to that point, the Vex
1203                front ends would translate a lock-prefixed instruction
1204                into a (normal) read followed by a (normal) write. */
1205             Int    dataSize;
1206             IRCAS* cas = st->Ist.CAS.details;
1207             tl_assert(cas->addr != NULL);
1208             tl_assert(cas->dataLo != NULL);
1209             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1210             if (cas->dataHi != NULL)
1211                dataSize *= 2; /* since it's a doubleword-CAS */
1212             /* I don't think this can ever happen, but play safe. */
1213             if (dataSize > min_line_size)
1214                dataSize = min_line_size;
1215             addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1216             addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1217             break;
1218          }
1219 
1220          case Ist_LLSC: {
1221             IRType dataTy;
1222             if (st->Ist.LLSC.storedata == NULL) {
1223                /* LL */
1224                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1225                addEvent_Dr( &cgs, curr_inode,
1226                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1227                /* flush events before LL, should help SC to succeed */
1228                flushEvents( &cgs );
1229             } else {
1230                /* SC */
1231                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1232                addEvent_Dw( &cgs, curr_inode,
1233                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1234             }
1235             break;
1236          }
1237 
1238          case Ist_Exit: {
1239             // call branch predictor only if this is a branch in guest code
1240             if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1241                  (st->Ist.Exit.jk == Ijk_Call) ||
1242                  (st->Ist.Exit.jk == Ijk_Ret) )
1243             {
1244                /* Stuff to widen the guard expression to a host word, so
1245                   we can pass it to the branch predictor simulation
1246                   functions easily. */
1247                Bool     inverted;
1248                Addr     nia, sea;
1249                IRConst* dst;
1250                IRType   tyW    = hWordTy;
1251                IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
1252                IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
1253                IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1254                IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1255                IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
1256                IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1257                                               : IRExpr_Const(IRConst_U64(1));
1258 
1259                /* First we need to figure out whether the side exit got
1260                   inverted by the ir optimiser.  To do that, figure out
1261                   the next (fallthrough) instruction's address and the
1262                   side exit address and see if they are the same. */
1263                nia = cia + isize;
1264 
1265                /* Side exit address */
1266                dst = st->Ist.Exit.dst;
1267                if (tyW == Ity_I32) {
1268                   tl_assert(dst->tag == Ico_U32);
1269                   sea = dst->Ico.U32;
1270                } else {
1271                   tl_assert(tyW == Ity_I64);
1272                   tl_assert(dst->tag == Ico_U64);
1273                   sea = dst->Ico.U64;
1274                }
1275 
1276                inverted = nia == sea;
1277 
1278                /* Widen the guard expression. */
1279                addStmtToIRSB( cgs.sbOut,
1280                               IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1281                addStmtToIRSB( cgs.sbOut,
1282                               IRStmt_WrTmp( guardW,
1283                                             IRExpr_Unop(widen,
1284                                                         IRExpr_RdTmp(guard1))) );
1285                /* If the exit is inverted, invert the sense of the guard. */
1286                addStmtToIRSB(
1287                      cgs.sbOut,
1288                      IRStmt_WrTmp(
1289                            guard,
1290                            inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1291                                     : IRExpr_RdTmp(guardW)
1292                               ));
1293                /* And post the event. */
1294                addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1295             }
1296 
1297             /* We may never reach the next statement, so need to flush
1298                all outstanding transactions now. */
1299             flushEvents( &cgs );
1300             break;
1301          }
1302 
1303          default:
1304             ppIRStmt(st);
1305             tl_assert(0);
1306             break;
1307       }
1308 
1309       /* Copy the original statement */
1310       addStmtToIRSB( cgs.sbOut, st );
1311 
1312       if (DEBUG_CG) {
1313          ppIRStmt(st);
1314          VG_(printf)("\n");
1315       }
1316    }
1317 
1318    /* Deal with branches to unknown destinations.  Except ignore ones
1319       which are function returns as we assume the return stack
1320       predictor never mispredicts. */
1321    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1322       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1323       switch (sbIn->next->tag) {
1324          case Iex_Const:
1325             break; /* boring - branch to known address */
1326          case Iex_RdTmp:
1327             /* looks like an indirect branch (branch to unknown) */
1328             addEvent_Bi( &cgs, curr_inode, sbIn->next );
1329             break;
1330          default:
1331             /* shouldn't happen - if the incoming IR is properly
1332                flattened, should only have tmp and const cases to
1333                consider. */
1334             tl_assert(0);
1335       }
1336    }
1337 
1338    /* At the end of the bb.  Flush outstandings. */
1339    flushEvents( &cgs );
1340 
1341    /* done.  stay sane ... */
1342    tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1343 
1344    if (DEBUG_CG) {
1345       VG_(printf)( "goto {");
1346       ppIRJumpKind(sbIn->jumpkind);
1347       VG_(printf)( "} ");
1348       ppIRExpr( sbIn->next );
1349       VG_(printf)( "}\n");
1350    }
1351 
1352    return cgs.sbOut;
1353 }
1354 
1355 /*------------------------------------------------------------*/
1356 /*--- Cache configuration                                  ---*/
1357 /*------------------------------------------------------------*/
1358 
1359 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1360 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1361 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1362 
1363 /*------------------------------------------------------------*/
1364 /*--- cg_fini() and related function                       ---*/
1365 /*------------------------------------------------------------*/
1366 
1367 // Total reads/writes/misses.  Calculated during CC traversal at the end.
1368 // All auto-zeroed.
1369 static CacheCC  Ir_total;
1370 static CacheCC  Dr_total;
1371 static CacheCC  Dw_total;
1372 static BranchCC Bc_total;
1373 static BranchCC Bi_total;
1374 
fprint_CC_table_and_calc_totals(void)1375 static void fprint_CC_table_and_calc_totals(void)
1376 {
1377    Int     i;
1378    VgFile  *fp;
1379    HChar   *currFile = NULL;
1380    const HChar *currFn = NULL;
1381    LineCC* lineCC;
1382 
1383    // Setup output filename.  Nb: it's important to do this now, ie. as late
1384    // as possible.  If we do it at start-up and the program forks and the
1385    // output file format string contains a %p (pid) specifier, both the
1386    // parent and child will incorrectly write to the same file;  this
1387    // happened in 3.3.0.
1388    HChar* cachegrind_out_file =
1389       VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1390 
1391    fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1392                                         VKI_S_IRUSR|VKI_S_IWUSR);
1393    if (fp == NULL) {
1394       // If the file can't be opened for whatever reason (conflict
1395       // between multiple cachegrinded processes?), give up now.
1396       VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1397                 cachegrind_out_file );
1398       VG_(umsg)("       ... so simulation results will be missing.\n");
1399       VG_(free)(cachegrind_out_file);
1400       return;
1401    } else {
1402       VG_(free)(cachegrind_out_file);
1403    }
1404 
1405    // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
1406    // the 2nd colon makes cg_annotate's output look nicer.
1407    VG_(fprintf)(fp,  "desc: I1 cache:         %s\n"
1408                      "desc: D1 cache:         %s\n"
1409                      "desc: LL cache:         %s\n",
1410                      I1.desc_line, D1.desc_line, LL.desc_line);
1411 
1412    // "cmd:" line
1413    VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1414    for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1415       HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1416       VG_(fprintf)(fp, " %s", arg);
1417    }
1418    // "events:" line
1419    if (clo_cache_sim && clo_branch_sim) {
1420       VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1421                                   "Bc Bcm Bi Bim\n");
1422    }
1423    else if (clo_cache_sim && !clo_branch_sim) {
1424       VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1425                                   "\n");
1426    }
1427    else if (!clo_cache_sim && clo_branch_sim) {
1428       VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1429    }
1430    else {
1431       VG_(fprintf)(fp, "\nevents: Ir\n");
1432    }
1433 
1434    // Traverse every lineCC
1435    VG_(OSetGen_ResetIter)(CC_table);
1436    while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1437       Bool just_hit_a_new_file = False;
1438       // If we've hit a new file, print a "fl=" line.  Note that because
1439       // each string is stored exactly once in the string table, we can use
1440       // pointer comparison rather than strcmp() to test for equality, which
1441       // is good because most of the time the comparisons are equal and so
1442       // the whole strings would have to be checked.
1443       if ( lineCC->loc.file != currFile ) {
1444          currFile = lineCC->loc.file;
1445          VG_(fprintf)(fp, "fl=%s\n", currFile);
1446          distinct_files++;
1447          just_hit_a_new_file = True;
1448       }
1449       // If we've hit a new function, print a "fn=" line.  We know to do
1450       // this when the function name changes, and also every time we hit a
1451       // new file (in which case the new function name might be the same as
1452       // in the old file, hence the just_hit_a_new_file test).
1453       if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1454          currFn = lineCC->loc.fn;
1455          VG_(fprintf)(fp, "fn=%s\n", currFn);
1456          distinct_fns++;
1457       }
1458 
1459       // Print the LineCC
1460       if (clo_cache_sim && clo_branch_sim) {
1461          VG_(fprintf)(fp,  "%d %llu %llu %llu"
1462                              " %llu %llu %llu"
1463                              " %llu %llu %llu"
1464                              " %llu %llu %llu %llu\n",
1465                             lineCC->loc.line,
1466                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1467                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1468                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1469                             lineCC->Bc.b, lineCC->Bc.mp,
1470                             lineCC->Bi.b, lineCC->Bi.mp);
1471       }
1472       else if (clo_cache_sim && !clo_branch_sim) {
1473          VG_(fprintf)(fp,  "%d %llu %llu %llu"
1474                              " %llu %llu %llu"
1475                              " %llu %llu %llu\n",
1476                             lineCC->loc.line,
1477                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1478                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1479                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1480       }
1481       else if (!clo_cache_sim && clo_branch_sim) {
1482          VG_(fprintf)(fp,  "%d %llu"
1483                              " %llu %llu %llu %llu\n",
1484                             lineCC->loc.line,
1485                             lineCC->Ir.a,
1486                             lineCC->Bc.b, lineCC->Bc.mp,
1487                             lineCC->Bi.b, lineCC->Bi.mp);
1488       }
1489       else {
1490          VG_(fprintf)(fp,  "%d %llu\n",
1491                             lineCC->loc.line,
1492                             lineCC->Ir.a);
1493       }
1494 
1495       // Update summary stats
1496       Ir_total.a  += lineCC->Ir.a;
1497       Ir_total.m1 += lineCC->Ir.m1;
1498       Ir_total.mL += lineCC->Ir.mL;
1499       Dr_total.a  += lineCC->Dr.a;
1500       Dr_total.m1 += lineCC->Dr.m1;
1501       Dr_total.mL += lineCC->Dr.mL;
1502       Dw_total.a  += lineCC->Dw.a;
1503       Dw_total.m1 += lineCC->Dw.m1;
1504       Dw_total.mL += lineCC->Dw.mL;
1505       Bc_total.b  += lineCC->Bc.b;
1506       Bc_total.mp += lineCC->Bc.mp;
1507       Bi_total.b  += lineCC->Bi.b;
1508       Bi_total.mp += lineCC->Bi.mp;
1509 
1510       distinct_lines++;
1511    }
1512 
1513    // Summary stats must come after rest of table, since we calculate them
1514    // during traversal.  */
1515    if (clo_cache_sim && clo_branch_sim) {
1516       VG_(fprintf)(fp,  "summary:"
1517                         " %llu %llu %llu"
1518                         " %llu %llu %llu"
1519                         " %llu %llu %llu"
1520                         " %llu %llu %llu %llu\n",
1521                         Ir_total.a, Ir_total.m1, Ir_total.mL,
1522                         Dr_total.a, Dr_total.m1, Dr_total.mL,
1523                         Dw_total.a, Dw_total.m1, Dw_total.mL,
1524                         Bc_total.b, Bc_total.mp,
1525                         Bi_total.b, Bi_total.mp);
1526    }
1527    else if (clo_cache_sim && !clo_branch_sim) {
1528       VG_(fprintf)(fp,  "summary:"
1529                         " %llu %llu %llu"
1530                         " %llu %llu %llu"
1531                         " %llu %llu %llu\n",
1532                         Ir_total.a, Ir_total.m1, Ir_total.mL,
1533                         Dr_total.a, Dr_total.m1, Dr_total.mL,
1534                         Dw_total.a, Dw_total.m1, Dw_total.mL);
1535    }
1536    else if (!clo_cache_sim && clo_branch_sim) {
1537       VG_(fprintf)(fp,  "summary:"
1538                         " %llu"
1539                         " %llu %llu %llu %llu\n",
1540                         Ir_total.a,
1541                         Bc_total.b, Bc_total.mp,
1542                         Bi_total.b, Bi_total.mp);
1543    }
1544    else {
1545       VG_(fprintf)(fp, "summary:"
1546                         " %llu\n",
1547                         Ir_total.a);
1548    }
1549 
1550    VG_(fclose)(fp);
1551 }
1552 
ULong_width(ULong n)1553 static UInt ULong_width(ULong n)
1554 {
1555    UInt w = 0;
1556    while (n > 0) {
1557       n = n / 10;
1558       w++;
1559    }
1560    if (w == 0) w = 1;
1561    return w + (w-1)/3;   // add space for commas
1562 }
1563 
cg_fini(Int exitcode)1564 static void cg_fini(Int exitcode)
1565 {
1566    static HChar fmt[128];   // OK; large enough
1567 
1568    CacheCC  D_total;
1569    BranchCC B_total;
1570    ULong LL_total_m, LL_total_mr, LL_total_mw,
1571          LL_total, LL_total_r, LL_total_w;
1572    Int l1, l2, l3;
1573 
1574    fprint_CC_table_and_calc_totals();
1575 
1576    if (VG_(clo_verbosity) == 0)
1577       return;
1578 
1579    // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1580    #define CG_MAX(a, b)  ((a) >= (b) ? (a) : (b))
1581 
1582    /* I cache results.  Use the I_refs value to determine the first column
1583     * width. */
1584    l1 = ULong_width(Ir_total.a);
1585    l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1586    l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1587 
1588    /* Make format string, getting width right for numbers */
1589    VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1590 
1591    /* Always print this */
1592    VG_(umsg)(fmt, "I   refs:     ", Ir_total.a);
1593 
1594    /* If cache profiling is enabled, show D access numbers and all
1595       miss numbers */
1596    if (clo_cache_sim) {
1597       VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
1598       VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
1599 
1600       if (0 == Ir_total.a) Ir_total.a = 1;
1601       VG_(umsg)("I1  miss rate: %*.2f%%\n", l1,
1602                 Ir_total.m1 * 100.0 / Ir_total.a);
1603       VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1604                 Ir_total.mL * 100.0 / Ir_total.a);
1605       VG_(umsg)("\n");
1606 
1607       /* D cache results.  Use the D_refs.rd and D_refs.wr values to
1608        * determine the width of columns 2 & 3. */
1609       D_total.a  = Dr_total.a  + Dw_total.a;
1610       D_total.m1 = Dr_total.m1 + Dw_total.m1;
1611       D_total.mL = Dr_total.mL + Dw_total.mL;
1612 
1613       /* Make format string, getting width right for numbers */
1614       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
1615                         l1, l2, l3);
1616 
1617       VG_(umsg)(fmt, "D   refs:     ",
1618                      D_total.a, Dr_total.a, Dw_total.a);
1619       VG_(umsg)(fmt, "D1  misses:   ",
1620                      D_total.m1, Dr_total.m1, Dw_total.m1);
1621       VG_(umsg)(fmt, "LLd misses:   ",
1622                      D_total.mL, Dr_total.mL, Dw_total.mL);
1623 
1624       if (0 == D_total.a)  D_total.a = 1;
1625       if (0 == Dr_total.a) Dr_total.a = 1;
1626       if (0 == Dw_total.a) Dw_total.a = 1;
1627       VG_(umsg)("D1  miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1628                 l1, D_total.m1  * 100.0 / D_total.a,
1629                 l2, Dr_total.m1 * 100.0 / Dr_total.a,
1630                 l3, Dw_total.m1 * 100.0 / Dw_total.a);
1631       VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1632                 l1, D_total.mL  * 100.0 / D_total.a,
1633                 l2, Dr_total.mL * 100.0 / Dr_total.a,
1634                 l3, Dw_total.mL * 100.0 / Dw_total.a);
1635       VG_(umsg)("\n");
1636 
1637       /* LL overall results */
1638 
1639       LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1640       LL_total_r = Dr_total.m1 + Ir_total.m1;
1641       LL_total_w = Dw_total.m1;
1642       VG_(umsg)(fmt, "LL refs:      ",
1643                      LL_total, LL_total_r, LL_total_w);
1644 
1645       LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1646       LL_total_mr = Dr_total.mL + Ir_total.mL;
1647       LL_total_mw = Dw_total.mL;
1648       VG_(umsg)(fmt, "LL misses:    ",
1649                      LL_total_m, LL_total_mr, LL_total_mw);
1650 
1651       VG_(umsg)("LL miss rate:  %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1652                 l1, LL_total_m  * 100.0 / (Ir_total.a + D_total.a),
1653                 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1654                 l3, LL_total_mw * 100.0 / Dw_total.a);
1655    }
1656 
1657    /* If branch profiling is enabled, show branch overall results. */
1658    if (clo_branch_sim) {
1659       /* Make format string, getting width right for numbers */
1660       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
1661                         l1, l2, l3);
1662 
1663       if (0 == Bc_total.b)  Bc_total.b = 1;
1664       if (0 == Bi_total.b)  Bi_total.b = 1;
1665       B_total.b  = Bc_total.b  + Bi_total.b;
1666       B_total.mp = Bc_total.mp + Bi_total.mp;
1667 
1668       VG_(umsg)("\n");
1669       VG_(umsg)(fmt, "Branches:     ",
1670                      B_total.b, Bc_total.b, Bi_total.b);
1671 
1672       VG_(umsg)(fmt, "Mispredicts:  ",
1673                      B_total.mp, Bc_total.mp, Bi_total.mp);
1674 
1675       VG_(umsg)("Mispred rate:  %*.1f%% (%*.1f%%     + %*.1f%%   )\n",
1676                 l1, B_total.mp  * 100.0 / B_total.b,
1677                 l2, Bc_total.mp * 100.0 / Bc_total.b,
1678                 l3, Bi_total.mp * 100.0 / Bi_total.b);
1679    }
1680 
1681    // Various stats
1682    if (VG_(clo_stats)) {
1683       Int debug_lookups = full_debugs      + fn_debugs +
1684                           file_line_debugs + no_debugs;
1685 
1686       VG_(dmsg)("\n");
1687       VG_(dmsg)("cachegrind: distinct files     : %d\n", distinct_files);
1688       VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1689       VG_(dmsg)("cachegrind: distinct lines     : %d\n", distinct_lines);
1690       VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1691       VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1692       VG_(dmsg)("cachegrind: debug lookups      : %d\n", debug_lookups);
1693 
1694       VG_(dmsg)("cachegrind: with full      info:%6.1f%% (%d)\n",
1695                 full_debugs * 100.0 / debug_lookups, full_debugs);
1696       VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1697                 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1698       VG_(dmsg)("cachegrind: with fn name   info:%6.1f%% (%d)\n",
1699                 fn_debugs * 100.0 / debug_lookups, fn_debugs);
1700       VG_(dmsg)("cachegrind: with zero      info:%6.1f%% (%d)\n",
1701                 no_debugs * 100.0 / debug_lookups, no_debugs);
1702 
1703       VG_(dmsg)("cachegrind: string table size: %u\n",
1704                 VG_(OSetGen_Size)(stringTable));
1705       VG_(dmsg)("cachegrind: CC table size: %u\n",
1706                 VG_(OSetGen_Size)(CC_table));
1707       VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
1708                 VG_(OSetGen_Size)(instrInfoTable));
1709    }
1710 }
1711 
1712 /*--------------------------------------------------------------------*/
1713 /*--- Discarding BB info                                           ---*/
1714 /*--------------------------------------------------------------------*/
1715 
1716 // Called when a translation is removed from the translation cache for
1717 // any reason at all: to free up space, because the guest code was
1718 // unmapped or modified, or for any arbitrary reason.
1719 static
cg_discard_superblock_info(Addr orig_addr64,VexGuestExtents vge)1720 void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
1721 {
1722    SB_info* sbInfo;
1723    Addr     orig_addr = vge.base[0];
1724 
1725    tl_assert(vge.n_used > 0);
1726 
1727    if (DEBUG_CG)
1728       VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1729                    (void*)orig_addr,
1730                    (void*)vge.base[0], (ULong)vge.len[0]);
1731 
1732    // Get BB info, remove from table, free BB info.  Simple!  Note that we
1733    // use orig_addr, not the first instruction address in vge.
1734    sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1735    tl_assert(NULL != sbInfo);
1736    VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1737 }
1738 
1739 /*--------------------------------------------------------------------*/
1740 /*--- Command line processing                                      ---*/
1741 /*--------------------------------------------------------------------*/
1742 
cg_process_cmd_line_option(const HChar * arg)1743 static Bool cg_process_cmd_line_option(const HChar* arg)
1744 {
1745    if (VG_(str_clo_cache_opt)(arg,
1746                               &clo_I1_cache,
1747                               &clo_D1_cache,
1748                               &clo_LL_cache)) {}
1749 
1750    else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1751    else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
1752    else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1753    else
1754       return False;
1755 
1756    return True;
1757 }
1758 
cg_print_usage(void)1759 static void cg_print_usage(void)
1760 {
1761    VG_(print_cache_clo_opts)();
1762    VG_(printf)(
1763 "    --cache-sim=yes|no               collect cache stats? [yes]\n"
1764 "    --branch-sim=yes|no              collect branch prediction stats? [no]\n"
1765 "    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
1766    );
1767 }
1768 
cg_print_debug_usage(void)1769 static void cg_print_debug_usage(void)
1770 {
1771    VG_(printf)(
1772 "    (none)\n"
1773    );
1774 }
1775 
1776 /*--------------------------------------------------------------------*/
1777 /*--- Setup                                                        ---*/
1778 /*--------------------------------------------------------------------*/
1779 
1780 static void cg_post_clo_init(void); /* just below */
1781 
cg_pre_clo_init(void)1782 static void cg_pre_clo_init(void)
1783 {
1784    VG_(details_name)            ("Cachegrind");
1785    VG_(details_version)         (NULL);
1786    VG_(details_description)     ("a cache and branch-prediction profiler");
1787    VG_(details_copyright_author)(
1788       "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.");
1789    VG_(details_bug_reports_to)  (VG_BUGS_TO);
1790    VG_(details_avg_translation_sizeB) ( 500 );
1791 
1792    VG_(clo_vex_control).iropt_register_updates_default
1793       = VG_(clo_px_file_backed)
1794       = VexRegUpdSpAtMemAccess; // overridable by the user.
1795 
1796    VG_(basic_tool_funcs)          (cg_post_clo_init,
1797                                    cg_instrument,
1798                                    cg_fini);
1799 
1800    VG_(needs_superblock_discards)(cg_discard_superblock_info);
1801    VG_(needs_command_line_options)(cg_process_cmd_line_option,
1802                                    cg_print_usage,
1803                                    cg_print_debug_usage);
1804 }
1805 
cg_post_clo_init(void)1806 static void cg_post_clo_init(void)
1807 {
1808    cache_t I1c, D1c, LLc;
1809 
1810    CC_table =
1811       VG_(OSetGen_Create)(offsetof(LineCC, loc),
1812                           cmp_CodeLoc_LineCC,
1813                           VG_(malloc), "cg.main.cpci.1",
1814                           VG_(free));
1815    instrInfoTable =
1816       VG_(OSetGen_Create)(/*keyOff*/0,
1817                           NULL,
1818                           VG_(malloc), "cg.main.cpci.2",
1819                           VG_(free));
1820    stringTable =
1821       VG_(OSetGen_Create)(/*keyOff*/0,
1822                           stringCmp,
1823                           VG_(malloc), "cg.main.cpci.3",
1824                           VG_(free));
1825 
1826    VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1827                                        &clo_I1_cache,
1828                                        &clo_D1_cache,
1829                                        &clo_LL_cache);
1830 
1831    // min_line_size is used to make sure that we never feed
1832    // accesses to the simulator straddling more than two
1833    // cache lines at any cache level
1834    min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1835    min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1836 
1837    Int largest_load_or_store_size
1838       = VG_(machine_get_size_of_largest_guest_register)();
1839    if (min_line_size < largest_load_or_store_size) {
1840       /* We can't continue, because the cache simulation might
1841          straddle more than 2 lines, and it will assert.  So let's
1842          just stop before we start. */
1843       VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1844                 (Int)min_line_size);
1845       VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
1846                 largest_load_or_store_size );
1847       VG_(umsg)("  but it is not.  Exiting now.\n");
1848       VG_(exit)(1);
1849    }
1850 
1851    cachesim_initcaches(I1c, D1c, LLc);
1852 }
1853 
1854 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1855 
1856 /*--------------------------------------------------------------------*/
1857 /*--- end                                                          ---*/
1858 /*--------------------------------------------------------------------*/
1859 
1860