1 
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
4 /*--- This file checks stack and global array accesses.            ---*/
5 /*---                                                    sg_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
10    use in programs.
11 
12    Copyright (C) 2008-2017 OpenWorks Ltd
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 
32    Neither the names of the U.S. Department of Energy nor the
33    University of California nor the names of its contributors may be
34    used to endorse or promote products derived from this software
35    without prior written permission.
36 */
37 
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
50 
51 #include "pc_common.h"
52 
53 #include "sg_main.h"      // self
54 
55 
56 static
57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58 
59 
60 //////////////////////////////////////////////////////////////
61 //                                                          //
62 // Basic Stuff                                              //
63 //                                                          //
64 //////////////////////////////////////////////////////////////
65 
is_sane_TId(ThreadId tid)66 static inline Bool is_sane_TId ( ThreadId tid )
67 {
68    return tid >= 0 && tid < VG_N_THREADS
69           && tid != VG_INVALID_THREADID;
70 }
71 
sg_malloc(const HChar * cc,SizeT n)72 static void* sg_malloc ( const HChar* cc, SizeT n ) {
73    void* p;
74    tl_assert(n > 0);
75    p = VG_(malloc)( cc, n );
76    return p;
77 }
78 
sg_free(void * p)79 static void sg_free ( void* p ) {
80    tl_assert(p);
81    VG_(free)(p);
82 }
83 
84 
85 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
86    first interval is lower, 1 if the first interval is higher, and 0
87    if there is any overlap.  Redundant paranoia with casting is there
88    following what looked distinctly like a bug in gcc-4.1.2, in which
89    some of the comparisons were done signedly instead of
90    unsignedly. */
91 inline
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)92 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
93                                      Addr a2, SizeT n2 ) {
94    UWord a1w = (UWord)a1;
95    UWord n1w = (UWord)n1;
96    UWord a2w = (UWord)a2;
97    UWord n2w = (UWord)n2;
98    tl_assert(n1w > 0 && n2w > 0);
99    if (a1w + n1w <= a2w) return -1L;
100    if (a2w + n2w <= a1w) return 1L;
101    return 0;
102 }
103 
104 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
105    within [aBig,aBig+nBig). */
106 inline
is_subinterval_of(Addr aBig,SizeT nBig,Addr aSmall,SizeT nSmall)107 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
108                                 Addr aSmall, SizeT nSmall ) {
109    tl_assert(nBig > 0 && nSmall > 0);
110    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
111 }
112 
113 inline
Addr__min(Addr a1,Addr a2)114 static Addr Addr__min ( Addr a1, Addr a2 ) {
115    return a1 < a2 ? a1 : a2;
116 }
117 
118 inline
Addr__max(Addr a1,Addr a2)119 static Addr Addr__max ( Addr a1, Addr a2 ) {
120    return a1 < a2 ? a2 : a1;
121 }
122 
123 
124 //////////////////////////////////////////////////////////////
125 //                                                          //
126 // StackBlocks Persistent Cache                             //
127 //                                                          //
128 //////////////////////////////////////////////////////////////
129 
130 /* We maintain a set of XArray* of StackBlocks.  These are never
131    freed.  When a new StackBlock vector is acquired from
132    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
133    If not present, it is added.  If present, the just-acquired one is
134    freed and the copy used.
135 
136    This simplifies storage management elsewhere.  It allows us to
137    assume that a pointer to an XArray* of StackBlock is valid forever.
138    It also means there are no duplicates anywhere, which could be
139    important from a space point of view for programs that generate a
140    lot of translations, or where translations are frequently discarded
141    and re-made.
142 
143    Note that we normalise the arrays by sorting the elements according
144    to an arbitrary total order, so as to avoid the situation that two
145    vectors describe the same set of variables but are not structurally
146    identical. */
147 
StackBlock__sane(const StackBlock * fb)148 static inline Bool StackBlock__sane ( const StackBlock* fb )
149 {
150    if (fb->name[ sizeof(fb->name)-1 ] != 0)
151       return False;
152    if (fb->spRel != False && fb->spRel != True)
153       return False;
154    if (fb->isVec != False && fb->isVec != True)
155       return False;
156    return True;
157 }
158 
159 /* Generate an arbitrary total ordering on StackBlocks. */
StackBlock__cmp(const StackBlock * fb1,const StackBlock * fb2)160 static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
161 {
162    Word r;
163    tl_assert(StackBlock__sane(fb1));
164    tl_assert(StackBlock__sane(fb2));
165    /* Hopefully the .base test hits most of the time.  For the blocks
166       associated with any particular instruction, if the .base values
167       are the same then probably it doesn't make sense for the other
168       fields to be different.  But this is supposed to be a completely
169       general structural total order, so we have to compare everything
170       anyway. */
171    if (fb1->base < fb2->base) return -1;
172    if (fb1->base > fb2->base) return 1;
173    /* compare sizes */
174    if (fb1->szB < fb2->szB) return -1;
175    if (fb1->szB > fb2->szB) return 1;
176    /* compare sp/fp flag */
177    if (fb1->spRel < fb2->spRel) return -1;
178    if (fb1->spRel > fb2->spRel) return 1;
179    /* compare is/is-not array-typed flag */
180    if (fb1->isVec < fb2->isVec) return -1;
181    if (fb1->isVec > fb2->isVec) return 1;
182    /* compare the name */
183    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
184    return r;
185 }
186 
187 /* Returns True if all fields except .szB are the same.  szBs may or
188    may not be the same; they are simply not consulted. */
StackBlock__all_fields_except_szB_are_equal(StackBlock * fb1,StackBlock * fb2)189 static Bool StackBlock__all_fields_except_szB_are_equal (
190                StackBlock* fb1,
191                StackBlock* fb2
192             )
193 {
194    tl_assert(StackBlock__sane(fb1));
195    tl_assert(StackBlock__sane(fb2));
196    return fb1->base == fb2->base
197           && fb1->spRel == fb2->spRel
198           && fb1->isVec == fb2->isVec
199           && 0 == VG_(strcmp)(fb1->name, fb2->name);
200 }
201 
202 
203 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
StackBlocks__cmp(XArray * fb1s,XArray * fb2s)204 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
205 {
206    Word i, r, n1, n2;
207    n1 = VG_(sizeXA)( fb1s );
208    n2 = VG_(sizeXA)( fb2s );
209    if (n1 < n2) return -1;
210    if (n1 > n2) return 1;
211    for (i = 0; i < n1; i++) {
212       StackBlock *fb1, *fb2;
213       fb1 = VG_(indexXA)( fb1s, i );
214       fb2 = VG_(indexXA)( fb2s, i );
215       r = StackBlock__cmp( fb1, fb2 );
216       if (r != 0) return r;
217    }
218    tl_assert(i == n1 && i == n2);
219    return 0;
220 }
221 
pp_StackBlocks(XArray * sbs)222 static void pp_StackBlocks ( XArray* sbs )
223 {
224    Word i, n = VG_(sizeXA)( sbs );
225    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
226    for (i = 0; i < n; i++) {
227       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
228       VG_(message)(Vg_DebugMsg,
229          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
230          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
231          sb->isVec ? 'Y' : 'N', &sb->name[0]
232       );
233    }
234    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
235 }
236 
237 
238 /* ---------- The StackBlock vector cache ---------- */
239 
240 static WordFM* /* XArray* of StackBlock -> nothing */
241        frameBlocks_set = NULL;
242 
init_StackBlocks_set(void)243 static void init_StackBlocks_set ( void )
244 {
245    tl_assert(!frameBlocks_set);
246    frameBlocks_set
247       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
248                     (Word(*)(UWord,UWord))StackBlocks__cmp );
249    tl_assert(frameBlocks_set);
250 }
251 
252 /* Find the given StackBlock-vector in our collection thereof.  If
253    found, deallocate the supplied one, and return the address of the
254    copy.  If not found, add the supplied one to our collection and
255    return its address. */
256 static XArray* /* of StackBlock */
StackBlocks__find_and_dealloc__or_add(XArray * orig)257        StackBlocks__find_and_dealloc__or_add
258           ( XArray* /* of StackBlock */ orig )
259 {
260    UWord key, val;
261 
262    /* First, normalise, as per comments above. */
263    VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
264    VG_(sortXA)( orig );
265 
266    /* Now get rid of any exact duplicates. */
267   nuke_dups:
268    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
269      if (n >= 2) {
270         w = 0;
271         nEQ = 0;
272         for (r = 0; r < n; r++) {
273            if (r+1 < n) {
274               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
275               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
276               Word c = StackBlock__cmp(pR0,pR1);
277               tl_assert(c == -1 || c == 0);
278               if (c == 0) { nEQ++; continue; }
279            }
280            if (w != r) {
281               StackBlock* pW = VG_(indexXA)( orig, w );
282               StackBlock* pR = VG_(indexXA)( orig, r );
283               *pW = *pR;
284            }
285            w++;
286         }
287         tl_assert(r == n);
288         tl_assert(w + nEQ == n);
289         if (w < n) {
290            VG_(dropTailXA)( orig, n-w );
291         }
292         if (0) VG_(printf)("delta %ld\n", n-w);
293      }
294    }
295 
296    /* Deal with the following strangeness, where two otherwise
297       identical blocks are claimed to have different sizes.  In which
298       case we use the larger size. */
299    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
300       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
301       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
302    */
303    { Word i, n = VG_(sizeXA)( orig );
304      if (n >= 2) {
305         for (i = 0; i < n-1; i++) {
306            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
307            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
308            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
309               /* They can't be identical because the previous tidying
310                  pass would have removed the duplicates.  And they
311                  can't be > because the earlier sorting pass would
312                  have ordered otherwise-identical descriptors
313                  according to < on .szB fields.  Hence: */
314               tl_assert(sb0->szB < sb1->szB);
315               sb0->szB = sb1->szB;
316               /* This makes the blocks identical, at the size of the
317                  larger one.  Rather than go to all the hassle of
318                  sliding the rest down, simply go back to the
319                  remove-duplicates stage.  The assertion guarantees
320                  that we eventually make progress, since the rm-dups
321                  stage will get rid of one of the blocks.  This is
322                  expected to happen only exceedingly rarely. */
323               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
324               goto nuke_dups;
325            }
326         }
327      }
328    }
329 
330    /* If there are any blocks which overlap and have the same
331       fpRel-ness, junk the whole descriptor; it's obviously bogus.
332       Icc11 certainly generates bogus info from time to time.
333 
334       This check is pretty weak; really we ought to have a stronger
335       sanity check. */
336    { Word i, n = VG_(sizeXA)( orig );
337      static Int moans = 3;
338      for (i = 0; i < n-1; i++) {
339        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
340        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
341        if (sb1->spRel == sb2->spRel
342            && (sb1->base >= sb2->base
343                || sb1->base + sb1->szB > sb2->base)) {
344           if (moans > 0 && !VG_(clo_xml)) {
345              moans--;
346              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
347                                       "overlapping stack blocks\n");
348              if (VG_(clo_verbosity) >= 2)
349                 pp_StackBlocks(orig);
350              if (moans == 0)
351                 VG_(message)(Vg_UserMsg, "Further instances of this "
352                                          "message will not be shown\n" );
353           }
354           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
355           break;
356        }
357      }
358    }
359 
360    /* Now, do we have it already? */
361    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
362       /* yes */
363       XArray* res;
364       tl_assert(val == 0);
365       tl_assert(key != (UWord)orig);
366       VG_(deleteXA)(orig);
367       res = (XArray*)key;
368       return res;
369    } else {
370       /* no */
371       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
372       return orig;
373    }
374 }
375 
376 /* Top level function for getting the StackBlock vector for a given
377    instruction.  It is guaranteed that the returned pointer will be
378    valid for the entire rest of the run, and also that the addresses
379    of the individual elements of the array will not change. */
380 
get_StackBlocks_for_IP(Addr ip)381 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
382 {
383    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
384    tl_assert(blocks);
385    return StackBlocks__find_and_dealloc__or_add( blocks );
386 }
387 
388 
389 //////////////////////////////////////////////////////////////
390 //                                                          //
391 // GlobalBlocks Persistent Cache                            //
392 //                                                          //
393 //////////////////////////////////////////////////////////////
394 
395 /* Generate an arbitrary total ordering on GlobalBlocks. */
GlobalBlock__cmp(GlobalBlock * gb1,GlobalBlock * gb2)396 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
397 {
398    Word r;
399    /* compare addrs */
400    if (gb1->addr < gb2->addr) return -1;
401    if (gb1->addr > gb2->addr) return 1;
402    /* compare sizes */
403    if (gb1->szB < gb2->szB) return -1;
404    if (gb1->szB > gb2->szB) return 1;
405    /* compare is/is-not array-typed flag */
406    if (gb1->isVec < gb2->isVec) return -1;
407    if (gb1->isVec > gb2->isVec) return 1;
408    /* compare the name */
409    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
410    if (r != 0) return r;
411    /* compare the soname */
412    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
413    return r;
414 }
415 
416 static WordFM* /* GlobalBlock* -> nothing */
417        globalBlock_set = NULL;
418 
init_GlobalBlock_set(void)419 static void init_GlobalBlock_set ( void )
420 {
421    tl_assert(!globalBlock_set);
422     globalBlock_set
423        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
424                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
425    tl_assert(globalBlock_set);
426 }
427 
428 
429 /* Top level function for making GlobalBlocks persistent.  Call here
430    with a non-persistent version, and the returned one is guaranteed
431    to be valid for the entire rest of the run.  The supplied one is
432    copied, not stored, so can be freed after the call. */
433 
get_persistent_GlobalBlock(GlobalBlock * orig)434 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
435 {
436    UWord key, val;
437    /* Now, do we have it already? */
438    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
439       /* yes, return the copy */
440       GlobalBlock* res;
441       tl_assert(val == 0);
442       res = (GlobalBlock*)key;
443       tl_assert(res != orig);
444       return res;
445    } else {
446       /* no.  clone it, store the clone and return the clone's
447          address. */
448       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
449                                       sizeof(GlobalBlock) );
450       tl_assert(clone);
451       *clone = *orig;
452       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
453       return clone;
454    }
455 }
456 
457 
458 //////////////////////////////////////////////////////////////
459 //                                                          //
460 // Interval tree of StackTreeBlock                          //
461 //                                                          //
462 //////////////////////////////////////////////////////////////
463 
464 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
465    are not allowed.
466 
467    A stack interval tree is a (WordFM StackTreeNode* void).  There is
468    one stack interval tree for each thread.
469 */
470 typedef
471    struct {
472       Addr        addr;
473       SizeT       szB;   /* copied from .descr->szB */
474       StackBlock* descr; /* it's an instance of this block */
475       UWord       depth; /* depth of stack at time block was pushed */
476    }
477    StackTreeNode;
478 
pp_StackTree(WordFM * sitree,const HChar * who)479 static void pp_StackTree ( WordFM* sitree, const HChar* who )
480 {
481    UWord keyW, valW;
482    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
483    VG_(initIterFM)( sitree );
484    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
485       StackTreeNode* nd = (StackTreeNode*)keyW;
486       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
487                   nd->descr, nd->descr->name, nd->descr->szB);
488    }
489    VG_(printf)(">>> END   pp_StackTree %s\n", who );
490 }
491 
492 /* Interval comparison function for StackTreeNode */
cmp_intervals_StackTreeNode(StackTreeNode * sn1,StackTreeNode * sn2)493 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
494                                           StackTreeNode* sn2 )
495 {
496    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
497                                  sn2->addr, sn2->szB);
498 }
499 
500 /* Find the node holding 'a', if any. */
find_StackTreeNode(WordFM * sitree,Addr a)501 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
502 {
503    UWord keyW, valW;
504    StackTreeNode key;
505    tl_assert(sitree);
506    key.addr = a;
507    key.szB  = 1;
508    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
509       StackTreeNode* res = (StackTreeNode*)keyW;
510       tl_assert(valW == 0);
511       tl_assert(res != &key);
512       return res;
513    } else {
514       return NULL;
515    }
516 }
517 
518 /* Note that the supplied XArray of FrameBlock must have been
519    made persistent already. */
520 __attribute__((noinline))
add_blocks_to_StackTree(WordFM * sitree,XArray * descrs,XArray * bases,UWord depth)521 static void add_blocks_to_StackTree (
522                /*MOD*/WordFM* sitree,
523                XArray* /* FrameBlock */ descrs,
524                XArray* /* Addr */ bases,
525                UWord depth
526             )
527 {
528    Bool debug = (Bool)0;
529    Word i, nDescrs, nBases;
530 
531    nDescrs = VG_(sizeXA)( descrs ),
532    nBases = VG_(sizeXA)( bases );
533    tl_assert(nDescrs == nBases);
534 
535    if (nDescrs == 0) return;
536 
537    tl_assert(sitree);
538    if (debug) {
539       VG_(printf)("\ndepth = %lu\n", depth);
540       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
541       pp_StackBlocks(descrs);
542    }
543 
544    for (i = 0; i < nDescrs; i++) {
545       Bool already_present;
546       StackTreeNode* nyu;
547       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
548       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
549       tl_assert(descr->szB > 0);
550       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
551       nyu->addr  = addr;
552       nyu->szB   = descr->szB;
553       nyu->descr = descr;
554       nyu->depth = depth;
555       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
556       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
557       /* The interval can't already be there; else we have
558          overlapping stack blocks. */
559       tl_assert(!already_present);
560       if (debug) {
561          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
562       }
563    }
564    if (debug) {
565       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
566       VG_(printf)("\n");
567    }
568 }
569 
del_blocks_from_StackTree(WordFM * sitree,XArray * bases)570 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
571                                         XArray* /* Addr */ bases )
572 {
573    UWord oldK, oldV;
574    Word i, nBases = VG_(sizeXA)( bases );
575    for (i = 0; i < nBases; i++) {
576       Bool b;
577       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
578       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
579       /* The interval must be there; we added it earlier when
580          the associated frame was created. */
581       tl_assert(nd);
582       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
583       /* we just found the block! */
584       tl_assert(b);
585       tl_assert(oldV == 0);
586       tl_assert(nd == (StackTreeNode*)oldK);
587       sg_free(nd);
588    }
589 }
590 
591 
delete_StackTree__kFin(UWord keyW)592 static void delete_StackTree__kFin ( UWord keyW ) {
593    StackTreeNode* nd = (StackTreeNode*)keyW;
594    tl_assert(nd);
595    sg_free(nd);
596 }
delete_StackTree__vFin(UWord valW)597 static void delete_StackTree__vFin ( UWord valW ) {
598    tl_assert(valW == 0);
599 }
delete_StackTree(WordFM * sitree)600 static void delete_StackTree ( WordFM* sitree )
601 {
602    VG_(deleteFM)( sitree,
603                  delete_StackTree__kFin, delete_StackTree__vFin );
604 }
605 
new_StackTree(void)606 static WordFM* new_StackTree ( void ) {
607    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
608                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
609 }
610 
611 
612 //////////////////////////////////////////////////////////////
613 //                                                          //
614 // Interval tree of GlobalTreeBlock                         //
615 //                                                          //
616 //////////////////////////////////////////////////////////////
617 
618 /* A node in a global interval tree.  Zero length intervals
619    (.szB == 0) are not allowed.
620 
621    A global interval tree is a (WordFM GlobalTreeNode* void).  There
622    is one global interval tree for the entire process.
623 */
624 typedef
625    struct {
626       Addr         addr; /* copied from .descr->addr */
627       SizeT        szB; /* copied from .descr->szB */
628       GlobalBlock* descr; /* it's this block */
629    }
630    GlobalTreeNode;
631 
GlobalTreeNode__pp(GlobalTreeNode * nd)632 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
633    tl_assert(nd->descr);
634    VG_(printf)("GTNode [%#lx,+%lu) %s",
635                nd->addr, nd->szB, nd->descr->name);
636 }
637 
GlobalTree__pp(WordFM * gitree,const HChar * who)638 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
639                              const HChar* who )
640 {
641    UWord keyW, valW;
642    GlobalTreeNode* nd;
643    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
644    VG_(initIterFM)( gitree );
645    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
646       tl_assert(valW == 0);
647       nd = (GlobalTreeNode*)keyW;
648       VG_(printf)("  ");
649       GlobalTreeNode__pp(nd);
650       VG_(printf)("\n");
651    }
652    VG_(doneIterFM)( gitree );
653    VG_(printf)(">>>\n");
654 }
655 
656 /* Interval comparison function for GlobalTreeNode */
cmp_intervals_GlobalTreeNode(GlobalTreeNode * gn1,GlobalTreeNode * gn2)657 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
658                                            GlobalTreeNode* gn2 )
659 {
660    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
661                                   gn2->addr, gn2->szB );
662 }
663 
664 /* Find the node holding 'a', if any. */
find_GlobalTreeNode(WordFM * gitree,Addr a)665 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
666 {
667    UWord keyW, valW;
668    GlobalTreeNode key;
669    key.addr = a;
670    key.szB  = 1;
671    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
672       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
673       tl_assert(valW == 0);
674       tl_assert(res != &key);
675       return res;
676    } else {
677       return NULL;
678    }
679 }
680 
681 /* Note that the supplied GlobalBlock must have been made persistent
682    already. */
add_block_to_GlobalTree(WordFM * gitree,GlobalBlock * descr)683 static void add_block_to_GlobalTree (
684                /*MOD*/WordFM* gitree,
685                GlobalBlock* descr
686             )
687 {
688    Bool already_present;
689    GlobalTreeNode *nyu, *nd;
690    UWord keyW, valW;
691    static Int moans = 3;
692 
693    tl_assert(descr->szB > 0);
694    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
695    nyu->addr  = descr->addr;
696    nyu->szB   = descr->szB;
697    nyu->descr = descr;
698 
699    /* Basically it's an error to add a global block to the tree that
700       is already in the tree.  However, detect and ignore attempts to
701       insert exact duplicates; they do appear for some reason
702       (possible a bug in m_debuginfo?) */
703    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
704    if (already_present) {
705       tl_assert(valW == 0);
706       nd = (GlobalTreeNode*)keyW;
707       tl_assert(nd);
708       tl_assert(nd != nyu);
709       tl_assert(nd->descr);
710       tl_assert(nyu->descr);
711       if (nd->addr == nyu->addr && nd->szB == nyu->szB
712           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
713           /* Although it seems reasonable to demand that duplicate
714              blocks have identical names, that is too strict.  For
715              example, reading debuginfo from glibc produces two
716              otherwise identical blocks with names "tzname" and
717              "__tzname".  A constraint of the form "must be identical,
718              or one must be a substring of the other" would fix that.
719              However, such trickery is scuppered by the fact that we
720              truncate all variable names to 15 characters to make
721              storage management simpler, hence giving pairs like
722              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
723              it's simplest just to skip the name comparison
724              completely. */
725           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
726          /* exact duplicate; ignore it */
727          sg_free(nyu);
728          return;
729       }
730       /* else fall through; the assertion below will catch it */
731    }
732 
733    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
734    /* The interval can't already be there; else we have
735       overlapping global blocks. */
736    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
737       generate overlapping block descriptions in the Dwarf3; clearly
738       bogus. */
739    if (already_present && moans > 0 && !VG_(clo_xml)) {
740       moans--;
741       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
742                                "overlapping global blocks\n");
743       if (VG_(clo_verbosity) >= 2) {
744          GlobalTree__pp( gitree,
745                          "add_block_to_GlobalTree: non-exact duplicate" );
746          VG_(printf)("Overlapping block: ");
747          GlobalTreeNode__pp(nyu);
748          VG_(printf)("\n");
749       }
750       if (moans == 0)
751          VG_(message)(Vg_UserMsg, "Further instances of this "
752                                   "message will not be shown\n" );
753    }
754    /* tl_assert(!already_present); */
755 }
756 
del_GlobalTree_range(WordFM * gitree,Addr a,SizeT szB)757 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
758                                    Addr a, SizeT szB )
759 {
760    /* One easy way to do this: look up [a,a+szB) in the tree.  That
761       will either succeed, producing a block which intersects that
762       range, in which case we delete it and repeat; or it will fail,
763       in which case there are no blocks intersecting the range, and we
764       can bring the process to a halt. */
765    UWord keyW, valW, oldK, oldV;
766    GlobalTreeNode key, *nd;
767    Bool b, anyFound;
768 
769    tl_assert(szB > 0);
770 
771    anyFound = False;
772 
773    key.addr = a;
774    key.szB  = szB;
775 
776    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
777       anyFound = True;
778       nd = (GlobalTreeNode*)keyW;
779       tl_assert(valW == 0);
780       tl_assert(nd != &key);
781       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
782 
783       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
784       tl_assert(b);
785       tl_assert(oldV == 0);
786       tl_assert(oldK == keyW); /* check we deleted the node we just found */
787    }
788 
789    return anyFound;
790 }
791 
792 
793 //////////////////////////////////////////////////////////////
794 //                                                          //
795 // Invar                                                    //
796 //                                                          //
797 //////////////////////////////////////////////////////////////
798 
799 /* An invariant, as resulting from watching the destination of a
800    memory referencing instruction.  Initially is Inv_Unset until the
801    instruction makes a first access. */
802 
803 typedef
804    enum {
805       Inv_Unset=1,  /* not established yet */
806       Inv_Unknown,  /* unknown location */
807       Inv_Stack0,   /* array-typed stack block in innermost frame */
808       Inv_StackN,   /* array-typed stack block in non-innermost frame */
809       Inv_Global,   /* array-typed global block */
810    }
811    InvarTag;
812 
813 typedef
814    struct {
815       InvarTag tag;
816       union {
817          struct {
818          } Unset;
819          struct {
820          } Unknown;
821          struct {
822             Addr  addr;
823             SizeT szB;
824             StackBlock* descr;
825          } Stack0; /* innermost stack frame */
826          struct {
827             /* Pointer to a node in the interval tree for
828               this thread. */
829             StackTreeNode* nd;
830          } StackN; /* non-innermost stack frame */
831          struct {
832            /* Pointer to a GlobalBlock in the interval tree of
833               global blocks. */
834            GlobalTreeNode* nd;
835          } Global;
836       }
837       Inv;
838    }
839    Invar;
840 
841 /* Partial debugging printing for an Invar. */
pp_Invar(Invar * i)842 static void pp_Invar ( Invar* i )
843 {
844    switch (i->tag) {
845       case Inv_Unset:
846          VG_(printf)("Unset");
847          break;
848       case Inv_Unknown:
849          VG_(printf)("Unknown");
850          break;
851       case Inv_Stack0:
852          VG_(printf)("Stack0 [%#lx,+%lu)",
853                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
854          break;
855       case Inv_StackN:
856          VG_(printf)("StackN [%#lx,+%lu)",
857                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
858          break;
859       case Inv_Global:
860          VG_(printf)("Global [%#lx,+%lu)",
861                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
862          break;
863       default:
864          tl_assert(0);
865    }
866 }
867 
868 /* Compare two Invars for equality. */
eq_Invar(Invar * i1,Invar * i2)869 static Bool eq_Invar ( Invar* i1, Invar* i2 )
870 {
871    if (i1->tag != i2->tag)
872       return False;
873    switch (i1->tag) {
874       case Inv_Unset:
875          return True;
876       case Inv_Unknown:
877          return True;
878       case Inv_Stack0:
879          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
880                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
881       case Inv_StackN:
882          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
883       case Inv_Global:
884          return i1->Inv.Global.nd == i2->Inv.Global.nd;
885       default:
886          tl_assert(0);
887    }
888    /*NOTREACHED*/
889    tl_assert(0);
890 }
891 
892 /* Generate a piece of text showing 'ea' is relative to 'invar', if
893    known.  If unknown, generate an empty string.  'buf' must be at
894    least 32 bytes in size.  Also return the absolute value of the
895    delta, if known, or zero if not known.
896 */
gen_delta_str(HChar * buf,UWord * absDelta,Invar * inv,Addr ea)897 static void gen_delta_str ( /*OUT*/HChar* buf,
898                             /*OUT*/UWord* absDelta,
899                             Invar* inv, Addr ea )
900 {
901    Addr  block = 0;
902    SizeT szB   = 0;
903 
904    buf[0] = 0;
905    *absDelta = 0;
906 
907    switch (inv->tag) {
908       case Inv_Unknown:
909       case Inv_Unset:
910          return; /* unknown */
911       case Inv_Stack0:
912          block = inv->Inv.Stack0.addr;
913          szB   = inv->Inv.Stack0.szB;
914          break;
915       case Inv_StackN:
916          block = inv->Inv.StackN.nd->addr;
917          szB   = inv->Inv.StackN.nd->szB;
918          break;
919       case Inv_Global:
920          block = inv->Inv.Global.nd->addr;
921          szB = inv->Inv.Global.nd->szB;
922          break;
923       default:
924          tl_assert(0);
925    }
926    tl_assert(szB > 0);
927    if (ea < block) {
928       *absDelta = block - ea;
929       VG_(sprintf)(buf, "%'lu before", *absDelta);
930    }
931    else if (ea >= block + szB) {
932       *absDelta = ea - (block + szB);
933       VG_(sprintf)(buf, "%'lu after", *absDelta);
934    }
935    else {
936      // Leave *absDelta at zero.
937      VG_(sprintf)(buf, "%'lu inside", ea - block);
938    }
939 }
940 
941 
942 /* Print selected parts of an Invar, suitable for use in error
943    messages. */
show_Invar(HChar * buf,Word nBuf,Invar * inv,Word depth)944 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
945 {
946    const HChar* str;
947    tl_assert(nBuf >= 128);
948    buf[0] = 0;
949    switch (inv->tag) {
950       case Inv_Unknown:
951          VG_(sprintf)(buf, "%s", "unknown");
952          break;
953       case Inv_Stack0:
954          str = "array";
955          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
956                       str, inv->Inv.Stack0.descr->name,
957                       inv->Inv.Stack0.szB );
958          break;
959       case Inv_StackN:
960          str = "array";
961          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
962                       str, inv->Inv.StackN.nd->descr->name,
963                            inv->Inv.StackN.nd->descr->szB,
964                            depth - inv->Inv.StackN.nd->depth );
965          break;
966       case Inv_Global:
967          str = "array";
968          VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
969                       str, inv->Inv.Global.nd->descr->name,
970                            inv->Inv.Global.nd->descr->szB,
971                            inv->Inv.Global.nd->descr->soname );
972          break;
973       case Inv_Unset:
974          VG_(sprintf)(buf, "%s", "Unset!");
975          break;
976       default:
977          tl_assert(0);
978    }
979 }
980 
981 
982 //////////////////////////////////////////////////////////////
983 //                                                          //
984 // our globals                                              //
985 //                                                          //
986 //////////////////////////////////////////////////////////////
987 
988 //////////////////////////////////////////////////////////////
989 ///
990 
991 #define N_QCACHE 16
992 
993 /* Powers of two only, else the result will be chaos */
994 #define QCACHE_ADVANCE_EVERY 16
995 
996 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
997    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
998 typedef
999    struct {
1000       Addr  addr;
1001       SizeT szB;
1002       Invar inv;
1003    }
1004    QCElem;
1005 
1006 typedef
1007    struct {
1008       Word   nInUse;
1009       QCElem elems[N_QCACHE];
1010    }
1011    QCache;
1012 
QCache__invalidate(QCache * qc)1013 static void QCache__invalidate ( QCache* qc ) {
1014    tl_assert(qc->nInUse >= 0);
1015    qc->nInUse = 0;
1016 }
1017 
QCache__pp(QCache * qc,const HChar * who)1018 static void QCache__pp ( QCache* qc, const HChar* who )
1019 {
1020    Word i;
1021    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1022    for (i = 0; i < qc->nInUse; i++) {
1023       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1024       pp_Invar(&qc->elems[i].inv);
1025       VG_(printf)("\n");
1026    }
1027    VG_(printf)(">>>\n");
1028 }
1029 
1030 static ULong stats__qcache_queries = 0;
1031 static ULong stats__qcache_misses  = 0;
1032 static ULong stats__qcache_probes  = 0;
1033 
1034 ///
1035 //////////////////////////////////////////////////////////////
1036 
1037 /* Each thread has:
1038    * a shadow stack of StackFrames, which is a double-linked list
1039    * an stack block interval tree
1040 */
1041 static  struct _StackFrame**         shadowStacks;
1042 
1043 static  WordFM** /* StackTreeNode */ siTrees;
1044 
1045 static  QCache*                      qcaches;
1046 
1047 
1048 /* Additionally, there is one global variable interval tree
1049    for the entire process.
1050 */
1051 static WordFM* /* GlobalTreeNode */ giTree;
1052 
1053 
invalidate_all_QCaches(void)1054 static void invalidate_all_QCaches ( void )
1055 {
1056    Word i;
1057    for (i = 0; i < VG_N_THREADS; i++) {
1058       QCache__invalidate( &qcaches[i] );
1059    }
1060 }
1061 
ourGlobals_init(void)1062 static void ourGlobals_init ( void )
1063 {
1064    Word i;
1065 
1066    shadowStacks = sg_malloc( "di.sg_main.oGi.2",
1067                              VG_N_THREADS * sizeof shadowStacks[0] );
1068    siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
1069    qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
1070 
1071    for (i = 0; i < VG_N_THREADS; i++) {
1072       shadowStacks[i] = NULL;
1073       siTrees[i] = NULL;
1074       qcaches[i] = (QCache){};
1075    }
1076    invalidate_all_QCaches();
1077    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1078                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1079 }
1080 
1081 
1082 //////////////////////////////////////////////////////////////
1083 //                                                          //
1084 // Handle global variable load/unload events                //
1085 //                                                          //
1086 //////////////////////////////////////////////////////////////
1087 
acquire_globals(ULong di_handle)1088 static void acquire_globals ( ULong di_handle )
1089 {
1090    Word n, i;
1091    XArray* /* of GlobalBlock */ gbs;
1092    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1093    gbs = VG_(di_get_global_blocks_from_dihandle)
1094             (di_handle, True/*arrays only*/);
1095    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
1096 
1097    n = VG_(sizeXA)( gbs );
1098    for (i = 0; i < n; i++) {
1099       GlobalBlock* gbp;
1100       GlobalBlock* gb = VG_(indexXA)( gbs, i );
1101       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
1102                          gb->szB, gb->addr, gb->soname, gb->name );
1103       tl_assert(gb->szB > 0);
1104       /* Make a persistent copy of each GlobalBlock, and add it
1105          to the tree. */
1106       gbp = get_persistent_GlobalBlock( gb );
1107       add_block_to_GlobalTree( giTree, gbp );
1108    }
1109 
1110    VG_(deleteXA)( gbs );
1111 }
1112 
1113 
1114 /* We only intercept these two because we need to see any di_handles
1115    that might arise from the mappings/allocations. */
sg_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1116 void sg_new_mem_mmap( Addr a, SizeT len,
1117                       Bool rr, Bool ww, Bool xx, ULong di_handle )
1118 {
1119    if (di_handle > 0)
1120       acquire_globals(di_handle);
1121 }
sg_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1122 void sg_new_mem_startup( Addr a, SizeT len,
1123                          Bool rr, Bool ww, Bool xx, ULong di_handle )
1124 {
1125    if (di_handle > 0)
1126       acquire_globals(di_handle);
1127 }
sg_die_mem_munmap(Addr a,SizeT len)1128 void sg_die_mem_munmap ( Addr a, SizeT len )
1129 {
1130    Bool debug = (Bool)0;
1131    Bool overlap = False;
1132 
1133    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1134 
1135    if (len == 0)
1136       return;
1137 
1138    overlap = del_GlobalTree_range(giTree, a, len);
1139 
1140    { /* redundant sanity check */
1141      UWord keyW, valW;
1142      VG_(initIterFM)( giTree );
1143      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1144        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1145         tl_assert(valW == 0);
1146         tl_assert(nd->szB > 0);
1147         tl_assert(nd->addr + nd->szB <= a
1148                   || a + len <= nd->addr);
1149      }
1150      VG_(doneIterFM)( giTree );
1151    }
1152 
1153    if (!overlap)
1154       return;
1155 
1156    /* Ok, the range contained some blocks.  Therefore we'll need to
1157       visit all the Invars in all the thread shadow stacks, and
1158       convert all Inv_Global entries that intersect [a,a+len) to
1159       Inv_Unknown. */
1160    tl_assert(len > 0);
1161    preen_global_Invars( a, len );
1162    invalidate_all_QCaches();
1163 }
1164 
1165 
1166 //////////////////////////////////////////////////////////////
1167 //                                                          //
1168 // StackFrame                                               //
1169 //                                                          //
1170 //////////////////////////////////////////////////////////////
1171 
1172 static ULong stats__total_accesses   = 0;
1173 static ULong stats__classify_Stack0  = 0;
1174 static ULong stats__classify_StackN  = 0;
1175 static ULong stats__classify_Global  = 0;
1176 static ULong stats__classify_Unknown = 0;
1177 static ULong stats__Invars_preened   = 0;
1178 static ULong stats__Invars_changed   = 0;
1179 static ULong stats__t_i_b_empty      = 0;
1180 static ULong stats__htab_fast        = 0;
1181 static ULong stats__htab_searches    = 0;
1182 static ULong stats__htab_probes      = 0;
1183 static ULong stats__htab_resizes     = 0;
1184 
1185 
1186 /* A dynamic instance of an instruction */
1187 typedef
1188    struct {
1189       /* IMMUTABLE */
1190       Addr    insn_addr; /* NB! zero means 'not in use' */
1191       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1192       /* MUTABLE */
1193       Invar invar;
1194    }
1195    IInstance;
1196 
1197 
1198 #define N_HTAB_FIXED 64
1199 
1200 typedef
1201    struct _StackFrame {
1202       /* The sp when the frame was created, so we know when to get rid
1203          of it. */
1204       Addr creation_sp;
1205       /* The stack frames for a thread are arranged as a doubly linked
1206          list.  Obviously the outermost frame in the stack has .outer
1207          as NULL and the innermost in theory has .inner as NULL.
1208          However, when a function returns, we don't delete the
1209          just-vacated StackFrame.  Instead, it is retained in the list
1210          and will be re-used when the next call happens.  This is so
1211          as to avoid constantly having to dynamically allocate and
1212          deallocate frames. */
1213       struct _StackFrame* inner;
1214       struct _StackFrame* outer;
1215       Word depth; /* 0 for outermost; increases inwards */
1216       /* Information for each memory referencing instruction, for this
1217          instantiation of the function.  The iinstances array is
1218          operated as a simple linear-probe hash table, which is
1219          dynamically expanded as necessary.  Once critical thing is
1220          that an IInstance with a .insn_addr of zero is interpreted to
1221          mean that hash table slot is unused.  This means we can't
1222          store an IInstance for address zero. */
1223       /* Note that htab initially points to htab_fixed.  If htab_fixed
1224          turns out not to be big enough then htab is made to point to
1225          dynamically allocated memory.  But it's often the case that
1226          htab_fixed is big enough, so this optimisation saves a huge
1227          number of sg_malloc/sg_free call pairs. */
1228       IInstance* htab;
1229       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1230       UWord      htab_used; /* number of hash table slots currently in use */
1231       /* If this frame is currently making a call, then the following
1232          are relevant. */
1233       Addr sp_at_call;
1234       Addr fp_at_call;
1235       XArray* /* of Addr */ blocks_added_by_call;
1236       /* See comment just above */
1237       IInstance htab_fixed[N_HTAB_FIXED];
1238    }
1239    StackFrame;
1240 
1241 
1242 
1243 
1244 
1245 /* Move this somewhere else? */
1246 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
1247    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
1248    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1249    instead. */
1250 
1251 __attribute__((noinline))
preen_global_Invar(Invar * inv,Addr a,SizeT len)1252 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1253 {
1254    stats__Invars_preened++;
1255    tl_assert(len > 0);
1256    tl_assert(inv);
1257    switch (inv->tag) {
1258       case Inv_Global:
1259          tl_assert(inv->Inv.Global.nd);
1260          tl_assert(inv->Inv.Global.nd->szB > 0);
1261          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1262                             inv->Inv.Global.nd->addr,
1263                             inv->Inv.Global.nd->szB);
1264          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1265                                                  inv->Inv.Global.nd->szB)) {
1266             inv->tag = Inv_Unknown;
1267             stats__Invars_changed++;
1268          }
1269          break;
1270       case Inv_Stack0:
1271       case Inv_StackN:
1272       case Inv_Unknown:
1273          break;
1274       case Inv_Unset: /* this should never happen */
1275          /* fallthrough */
1276       default:
1277          tl_assert(0);
1278    }
1279 }
1280 
1281 __attribute__((noinline))
preen_global_Invars(Addr a,SizeT len)1282 static void preen_global_Invars ( Addr a, SizeT len )
1283 {
1284    Int         i;
1285    UWord       u;
1286    StackFrame* frame;
1287    tl_assert(len > 0);
1288    for (i = 0; i < VG_N_THREADS; i++) {
1289       frame = shadowStacks[i];
1290       if (!frame)
1291          continue; /* no frames for this thread */
1292       /* start from the innermost frame */
1293       while (frame->inner)
1294          frame = frame->inner;
1295       tl_assert(frame->outer);
1296       /* work through the frames from innermost to outermost.  The
1297          order isn't important; we just need to ensure we visit each
1298          frame once (including those which are not actually active,
1299          more 'inner' than the 'innermost active frame', viz, just
1300          hanging around waiting to be used, when the current innermost
1301          active frame makes more calls.  See comments on definition of
1302          struct _StackFrame. */
1303       for (; frame; frame = frame->outer) {
1304          UWord xx = 0; /* sanity check only; count of used htab entries */
1305          if (!frame->htab)
1306             continue; /* frame not in use.  See shadowStack_unwind(). */
1307          for (u = 0; u < frame->htab_size; u++) {
1308             IInstance* ii = &frame->htab[u];
1309             if (ii->insn_addr == 0)
1310                continue; /* not in use */
1311             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1312             preen_global_Invar( &ii->invar, a, len );
1313             xx++;
1314          }
1315          tl_assert(xx == frame->htab_used);
1316       }
1317    }
1318 }
1319 
1320 
1321 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1322    of the ip are guaranteed to be zero */
compute_II_hash(Addr ip,UWord htab_size)1323 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1324    return (ip >> 0) & (htab_size - 1);
1325 }
1326 
1327 __attribute__((noinline))
initialise_II_hash_table(StackFrame * sf)1328 static void initialise_II_hash_table ( StackFrame* sf )
1329 {
1330    UWord i;
1331    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1332    sf->htab = &sf->htab_fixed[0];
1333    tl_assert(sf->htab);
1334    sf->htab_used = 0;
1335    for (i = 0; i < sf->htab_size; i++)
1336       sf->htab[i].insn_addr = 0; /* NOT IN USE */
1337 }
1338 
1339 
1340 __attribute__((noinline))
resize_II_hash_table(StackFrame * sf)1341 static void resize_II_hash_table ( StackFrame* sf )
1342 {
1343    UWord     i, j, ix, old_size, new_size;
1344    IInstance *old_htab, *new_htab, *old;
1345 
1346    tl_assert(sf && sf->htab);
1347    old_size = sf->htab_size;
1348    new_size = 2 * old_size;
1349    old_htab = sf->htab;
1350    new_htab = sg_malloc( "di.sg_main.rIht.1",
1351                          new_size * sizeof(IInstance) );
1352    for (i = 0; i < new_size; i++) {
1353       new_htab[i].insn_addr = 0; /* NOT IN USE */
1354    }
1355    for (i = 0; i < old_size; i++) {
1356       old = &old_htab[i];
1357       if (old->insn_addr == 0 /* NOT IN USE */)
1358          continue;
1359       ix = compute_II_hash(old->insn_addr, new_size);
1360       /* find out where to put this, in the new table */
1361       j = new_size;
1362       while (1) {
1363          if (new_htab[ix].insn_addr == 0)
1364             break;
1365          /* This can't ever happen, because it would mean the new
1366             table is full; that isn't allowed -- even the old table is
1367             only allowed to become half full. */
1368          tl_assert(j > 0);
1369          j--;
1370          ix++; if (ix == new_size) ix = 0;
1371       }
1372       /* copy the old entry to this location */
1373       tl_assert(ix < new_size);
1374       tl_assert(new_htab[ix].insn_addr == 0);
1375       new_htab[ix] = *old;
1376       tl_assert(new_htab[ix].insn_addr != 0);
1377    }
1378    /* all entries copied; free old table. */
1379    if (old_htab != &sf->htab_fixed[0])
1380       sg_free(old_htab);
1381    sf->htab = new_htab;
1382    sf->htab_size = new_size;
1383    /* check sf->htab_used is correct.  Optional and a bit expensive
1384       but anyway: */
1385    j = 0;
1386    for (i = 0; i < new_size; i++) {
1387       if (new_htab[i].insn_addr != 0) {
1388          j++;
1389       }
1390    }
1391    tl_assert(j == sf->htab_used);
1392    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1393 }
1394 
1395 
1396 __attribute__((noinline))
find_or_create_IInstance_SLOW(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1397 static IInstance* find_or_create_IInstance_SLOW (
1398                      StackFrame* sf,
1399                      Addr ip,
1400                      XArray* /* StackBlock */ ip_frameblocks
1401                   )
1402 {
1403    UWord i, ix;
1404 
1405    stats__htab_searches++;
1406 
1407    tl_assert(sf);
1408    tl_assert(sf->htab);
1409 
1410    /* Make sure the table loading doesn't get too high. */
1411    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1412       stats__htab_resizes++;
1413       resize_II_hash_table(sf);
1414    }
1415    tl_assert(2 * sf->htab_used <= sf->htab_size);
1416 
1417    ix = compute_II_hash(ip, sf->htab_size);
1418    i = sf->htab_size;
1419    while (1) {
1420       stats__htab_probes++;
1421       /* Note that because of the way the fast-case handler works,
1422          these two tests are actually redundant in the first iteration
1423          of this loop.  (Except they aren't redundant if the code just
1424          above resized the table first. :-) */
1425       if (sf->htab[ix].insn_addr == ip)
1426          return &sf->htab[ix];
1427       if (sf->htab[ix].insn_addr == 0)
1428          break;
1429       /* If i ever gets to zero and we have found neither what we're
1430          looking for nor an empty slot, the table must be full.  Which
1431          isn't possible -- we monitor the load factor to ensure it
1432          doesn't get above say 50%; if that ever does happen the table
1433          is resized. */
1434       tl_assert(i > 0);
1435       i--;
1436       ix++;
1437       if (ix == sf->htab_size) ix = 0;
1438    }
1439 
1440    /* So now we've found a free slot at ix, and we can use that. */
1441    tl_assert(sf->htab[ix].insn_addr == 0);
1442 
1443    /* Add a new record in this slot. */
1444    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1445    sf->htab[ix].insn_addr = ip;
1446    sf->htab[ix].blocks    = ip_frameblocks;
1447    sf->htab[ix].invar.tag = Inv_Unset;
1448    sf->htab_used++;
1449    return &sf->htab[ix];
1450 }
1451 
1452 
1453 inline
find_or_create_IInstance(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1454 static IInstance* find_or_create_IInstance (
1455                      StackFrame* sf,
1456                      Addr ip,
1457                      XArray* /* StackBlock */ ip_frameblocks
1458                   )
1459 {
1460    UWord ix = compute_II_hash(ip, sf->htab_size);
1461    /* Is it in the first slot we come to? */
1462    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1463       stats__htab_fast++;
1464       return &sf->htab[ix];
1465    }
1466    /* If the first slot we come to is empty, bag it. */
1467    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1468       stats__htab_fast++;
1469       tl_assert(ip != 0);
1470       sf->htab[ix].insn_addr = ip;
1471       sf->htab[ix].blocks    = ip_frameblocks;
1472       sf->htab[ix].invar.tag = Inv_Unset;
1473       sf->htab_used++;
1474       return &sf->htab[ix];
1475    }
1476    /* Otherwise we hand off to the slow case, which searches other
1477       slots, and optionally resizes the table if necessary. */
1478    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1479 }
1480 
1481 
1482 __attribute__((noinline))
calculate_StackBlock_EA(StackBlock * descr,Addr sp,Addr fp)1483 static Addr calculate_StackBlock_EA ( StackBlock* descr,
1484                                       Addr sp, Addr fp ) {
1485    UWord w1 = (UWord)descr->base;
1486    UWord w2 = (UWord)(descr->spRel ? sp : fp);
1487    UWord ea = w1 + w2;
1488    return ea;
1489 }
1490 
1491 /* Given an array of StackBlocks, return an array of Addrs, holding
1492    their effective addresses.  Caller deallocates result array. */
1493 __attribute__((noinline))
calculate_StackBlock_EAs(XArray * blocks,Addr sp,Addr fp)1494 static XArray* /* Addr */ calculate_StackBlock_EAs (
1495                              XArray* /* StackBlock */ blocks,
1496                              Addr sp, Addr fp
1497                           )
1498 {
1499    XArray* res;
1500    Word i, n = VG_(sizeXA)( blocks );
1501    tl_assert(n > 0);
1502    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1503    for (i = 0; i < n; i++) {
1504       StackBlock* blk = VG_(indexXA)( blocks, i );
1505       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1506       VG_(addToXA)( res, &ea );
1507    }
1508    return res;
1509 }
1510 
1511 
1512 /* Try to classify the block into which a memory access falls, and
1513    write the result in 'inv'.  This writes all relevant fields of
1514    'inv'. */
1515 __attribute__((noinline))
classify_address(Invar * inv,ThreadId tid,Addr ea,Addr sp,Addr fp,UWord szB,XArray * thisInstrBlocks)1516 static void classify_address ( /*OUT*/Invar* inv,
1517                                ThreadId tid,
1518                                Addr ea, Addr sp, Addr fp,
1519                                UWord szB,
1520                                XArray* /* of StackBlock */ thisInstrBlocks )
1521 {
1522    tl_assert(szB > 0);
1523    /* First, look in the stack blocks accessible in this instruction's
1524       frame. */
1525    {
1526      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1527      if (nBlocks == 0) stats__t_i_b_empty++;
1528      for (i = 0; i < nBlocks; i++) {
1529         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1530         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1531         if (bea <= ea && ea + szB <= bea + descr->szB) {
1532            /* found it */
1533            inv->tag = Inv_Stack0;
1534            inv->Inv.Stack0.addr  = bea;
1535            inv->Inv.Stack0.szB   = descr->szB;
1536            inv->Inv.Stack0.descr = descr;
1537            stats__classify_Stack0++;
1538            return;
1539         }
1540      }
1541    }
1542    /* Look in this thread's query cache */
1543    { Word i;
1544      QCache* cache = &qcaches[tid];
1545      static UWord ctr = 0;
1546      stats__qcache_queries++;
1547      for (i = 0; i < cache->nInUse; i++) {
1548         if (0) /* expensive in a loop like this */
1549                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1550         stats__qcache_probes++;
1551         if (is_subinterval_of(cache->elems[i].addr,
1552                               cache->elems[i].szB, ea, szB)) {
1553            if (i > 0
1554                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1555               QCElem tmp;
1556               tmp = cache->elems[i-1];
1557               cache->elems[i-1] = cache->elems[i];
1558               cache->elems[i] = tmp;
1559               i--;
1560            }
1561            *inv = cache->elems[i].inv;
1562            return;
1563         }
1564      }
1565      stats__qcache_misses++;
1566    }
1567    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
1568       in some calling frame?  Consult this thread's stack-block
1569       interval tree to find out. */
1570    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1571      /* We know that [ea,ea+1) is in the block, but we need to
1572         restrict to the case where the whole access falls within
1573         it. */
1574      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1575         nd = NULL;
1576      }
1577      if (nd) {
1578         /* found it */
1579         inv->tag = Inv_StackN;
1580         inv->Inv.StackN.nd = nd;
1581         stats__classify_StackN++;
1582         goto out;
1583      }
1584    }
1585    /* Not in a stack block.  Try the global pool. */
1586    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1587      /* We know that [ea,ea+1) is in the block, but we need to
1588         restrict to the case where the whole access falls within
1589         it. */
1590      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1591         nd = NULL;
1592      }
1593      if (nd) {
1594         /* found it */
1595         inv->tag = Inv_Global;
1596         inv->Inv.Global.nd = nd;
1597         stats__classify_Global++;
1598         goto out;
1599      }
1600    }
1601    /* No idea - give up. */
1602    inv->tag = Inv_Unknown;
1603    stats__classify_Unknown++;
1604 
1605    /* Update the cache */
1606   out:
1607    { Addr    toadd_addr = 0;
1608      SizeT   toadd_szB  = 0;
1609      QCache* cache      = &qcaches[tid];
1610 
1611      static UWord ctr = 0;
1612      Bool show = False;
1613      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1614 
1615      if (show) QCache__pp(cache, "before upd");
1616 
1617      switch (inv->tag) {
1618         case Inv_Global:
1619            toadd_addr = inv->Inv.Global.nd->addr;
1620            toadd_szB  = inv->Inv.Global.nd->szB;
1621            break;
1622         case Inv_StackN:
1623            toadd_addr = inv->Inv.StackN.nd->addr;
1624            toadd_szB  = inv->Inv.StackN.nd->szB;
1625            break;
1626         case Inv_Unknown: {
1627            /* This is more complex.  We need to figure out the
1628               intersection of the "holes" in the global and stack
1629               interval trees into which [ea,ea+szB) falls.  This is
1630               further complicated by the fact that [ea,ea+szB) might
1631               not fall cleanly into a hole; it may instead fall across
1632               the boundary of a stack or global block.  In that case
1633               we just ignore it and don't update the cache, since we
1634               have no way to represent this situation precisely. */
1635            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
1636            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1637            Addr gMin, gMax, sMin, sMax, uMin, uMax;
1638            Bool sOK, gOK;
1639            sNegInf.addr = 0;
1640            sNegInf.szB  = 1;
1641            sPosInf.addr = ~(UWord)0;
1642            sPosInf.szB  = 1;
1643            gNegInf.addr = 0;
1644            gNegInf.szB  = 1;
1645            gPosInf.addr = ~(UWord)0;
1646            gPosInf.szB  = 1;
1647            sKey.addr = ea;
1648            sKey.szB  = szB;
1649            gKey.addr = ea;
1650            gKey.szB  = szB;
1651            if (0) VG_(printf)("Tree sizes %lu %lu\n",
1652                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1653            sOK = VG_(findBoundsFM)( siTrees[tid],
1654                                     (UWord*)&sLB,    NULL/*unused*/,
1655                                     (UWord*)&sUB,    NULL/*unused*/,
1656                                     (UWord)&sNegInf, 0/*unused*/,
1657                                     (UWord)&sPosInf, 0/*unused*/,
1658                                     (UWord)&sKey );
1659            gOK = VG_(findBoundsFM)( giTree,
1660                                     (UWord*)&gLB,    NULL/*unused*/,
1661                                     (UWord*)&gUB,    NULL/*unused*/,
1662                                     (UWord)&gNegInf, 0/*unused*/,
1663                                     (UWord)&gPosInf, 0/*unused*/,
1664                                     (UWord)&gKey );
1665            if (!(sOK && gOK)) {
1666               /* If this happens, then [ea,ea+szB) partially overlaps
1667                  a heap or stack block.  We can't represent that, so
1668                  just forget it (should be very rare).  However, do
1669                  maximum sanity checks first.  In such a
1670                  partial overlap case, it can't be the case that both
1671                  [ea] and [ea+szB-1] overlap the same block, since if
1672                  that were indeed the case then it wouldn't be a
1673                  partial overlap; rather it would simply fall inside
1674                  that block entirely and we shouldn't be inside this
1675                  conditional at all. */
1676               if (!sOK) {
1677                  StackTreeNode *ndFirst, *ndLast;
1678                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
1679                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1680                  /* if both ends of the range fall inside a block,
1681                     they can't be in the same block. */
1682                  if (ndFirst && ndLast)
1683                     tl_assert(ndFirst != ndLast);
1684                  /* for each end of the range, if it is in a block,
1685                     the range as a whole can't be entirely within the
1686                     block. */
1687                  if (ndFirst)
1688                     tl_assert(!is_subinterval_of(ndFirst->addr,
1689                                                  ndFirst->szB, ea, szB));
1690                  if (ndLast)
1691                     tl_assert(!is_subinterval_of(ndLast->addr,
1692                                                  ndLast->szB, ea, szB));
1693               }
1694               if (!gOK) {
1695                  GlobalTreeNode *ndFirst, *ndLast;
1696                  ndFirst = find_GlobalTreeNode( giTree, ea );
1697                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
1698                  /* if both ends of the range fall inside a block,
1699                     they can't be in the same block. */
1700                  if (ndFirst && ndLast)
1701                     tl_assert(ndFirst != ndLast);
1702                  /* for each end of the range, if it is in a block,
1703                     the range as a whole can't be entirely within the
1704                     block. */
1705                  if (ndFirst)
1706                     tl_assert(!is_subinterval_of(ndFirst->addr,
1707                                                  ndFirst->szB, ea, szB));
1708                  if (ndLast)
1709                     tl_assert(!is_subinterval_of(ndLast->addr,
1710                                                  ndLast->szB, ea, szB));
1711               }
1712               if (0) VG_(printf)("overlapping blocks in cache\n");
1713               return;
1714            }
1715            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
1716            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
1717            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
1718            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
1719            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1720                               sMin, sMax, gMin, gMax);
1721            /* [sMin,sMax] and [gMin,gMax] must both contain
1722               [ea,ea+szB) (right?)  That implies they must overlap at
1723               at least over [ea,ea+szB). */
1724            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1725            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1726            /* So now compute their intersection. */
1727            uMin = Addr__max( sMin, gMin );
1728            uMax = Addr__min( sMax, gMax );
1729            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1730            tl_assert(uMin <= uMax);
1731            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1732            /* Finally, we can park [uMin,uMax] in the cache.  However,
1733               if uMax is ~0, we can't represent the difference; hence
1734               fudge uMax. */
1735            if (uMin < uMax && uMax == ~(UWord)0)
1736               uMax--;
1737            toadd_addr = uMin;
1738            toadd_szB  = uMax - uMin + 1;
1739            break;
1740         }
1741         default:
1742            /* We should only be caching info for the above 3 cases */
1743           tl_assert(0);
1744      } /* switch (inv->tag) */
1745 
1746      { /* and actually add this to the cache, finally */
1747        Word i;
1748        Word ip = cache->nInUse / 2; /* doesn't seem critical */
1749 
1750        if (cache->nInUse < N_QCACHE)
1751           cache->nInUse++;
1752        for (i = cache->nInUse-1; i > ip; i--) {
1753           cache->elems[i] = cache->elems[i-1];
1754        }
1755 
1756        tl_assert(toadd_szB > 0);
1757        cache->elems[ip].addr = toadd_addr;
1758        cache->elems[ip].szB  = toadd_szB;
1759        cache->elems[ip].inv  = *inv;
1760      }
1761 
1762      if (show) QCache__pp(cache, "after upd");
1763 
1764    }
1765 }
1766 
1767 
1768 /* CALLED FROM GENERATED CODE */
1769 static
1770 VG_REGPARM(3)
helperc__mem_access(Addr ea,Addr sp,Addr fp,Word sszB,Addr ip,XArray * ip_frameBlocks)1771 void helperc__mem_access ( /* Known only at run time: */
1772                            Addr ea, Addr sp, Addr fp,
1773                            /* Known at translation time: */
1774                            Word sszB, Addr ip, XArray* ip_frameBlocks )
1775 {
1776    UWord szB;
1777    IInstance* iinstance;
1778    Invar* inv;
1779    Invar new_inv;
1780    ThreadId tid = VG_(get_running_tid)();
1781    StackFrame* frame;
1782    HChar bufE[160], bufA[160], bufD[32];
1783 
1784    stats__total_accesses++;
1785 
1786    tl_assert(is_sane_TId(tid));
1787    frame = shadowStacks[tid];
1788    tl_assert(frame);
1789 
1790    /* Find the instance info for this instruction. */
1791    tl_assert(ip_frameBlocks);
1792    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1793    tl_assert(iinstance);
1794    tl_assert(iinstance->blocks == ip_frameBlocks);
1795 
1796    szB = (sszB < 0) ? (-sszB) : sszB;
1797    tl_assert(szB > 0);
1798 
1799    inv = &iinstance->invar;
1800 
1801    /* Deal with first uses of instruction instances. */
1802    if (inv->tag == Inv_Unset) {
1803       /* This is the first use of this instance of the instruction, so
1804          we can't make any check; we merely record what we saw, so we
1805          can compare it against what happens for 2nd and subsequent
1806          accesses. */
1807       classify_address( inv,
1808                         tid, ea, sp, fp, szB, iinstance->blocks );
1809       tl_assert(inv->tag != Inv_Unset);
1810       return;
1811    }
1812 
1813    /* So generate an Invar and see if it's different from what
1814       we had before. */
1815    classify_address( &new_inv,
1816                      tid, ea, sp, fp, szB, iinstance->blocks );
1817    tl_assert(new_inv.tag != Inv_Unset);
1818 
1819    /* Did we see something different from before?  If no, then there's
1820       no error. */
1821    tl_assert(inv->tag != Inv_Unset);
1822 
1823    if (LIKELY(eq_Invar(&new_inv, inv)))
1824       return;
1825 
1826    VG_(memset)(bufE, 0, sizeof(bufE));
1827    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1828 
1829    VG_(memset)(bufA, 0, sizeof(bufA));
1830    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1831 
1832    VG_(memset)(bufD, 0, sizeof(bufD));
1833    UWord absDelta;
1834    gen_delta_str( bufD, &absDelta, inv, ea );
1835 
1836    if (absDelta < 1024)
1837       sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1838 
1839    /* And now install the new observation as "standard", so as to
1840       make future error messages make more sense. */
1841    *inv = new_inv;
1842 }
1843 
1844 
1845 ////////////////////////////////////////
1846 /* Primary push-a-new-frame routine.  Called indirectly from
1847    generated code. */
1848 
1849 static UWord stats__max_sitree_size = 0;
1850 static UWord stats__max_gitree_size = 0;
1851 
1852 static
shadowStack_new_frame(ThreadId tid,Addr sp_at_call_insn,Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * descrs_at_call_insn)1853 void shadowStack_new_frame ( ThreadId tid,
1854                              Addr     sp_at_call_insn,
1855                              Addr     sp_post_call_insn,
1856                              Addr     fp_at_call_insn,
1857                              Addr     ip_post_call_insn,
1858                              XArray*  descrs_at_call_insn )
1859 {
1860    StackFrame *callee, *caller;
1861    tl_assert(is_sane_TId(tid));
1862 
1863    caller = shadowStacks[tid];
1864    tl_assert(caller);
1865 
1866    if (caller->outer) { /* "this is not the outermost frame" */
1867       tl_assert(caller->outer->inner == caller);
1868       tl_assert(caller->outer->depth >= 0);
1869       tl_assert(1 + caller->outer->depth == caller->depth);
1870    } else {
1871       tl_assert(caller->depth == 0);
1872    }
1873 
1874    caller->sp_at_call = sp_at_call_insn;
1875    caller->fp_at_call = fp_at_call_insn;
1876 
1877    if (descrs_at_call_insn) {
1878       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1879       caller->blocks_added_by_call
1880          = calculate_StackBlock_EAs( descrs_at_call_insn,
1881                                      sp_at_call_insn, fp_at_call_insn );
1882       if (caller->blocks_added_by_call)
1883          add_blocks_to_StackTree( siTrees[tid],
1884                                   descrs_at_call_insn,
1885                                   caller->blocks_added_by_call,
1886                                   caller->depth /* stack depth at which
1887                                                    these blocks are
1888                                                    considered to exist*/ );
1889       if (1) {
1890          UWord s  = VG_(sizeFM)( siTrees[tid] );
1891          UWord g  = VG_(sizeFM)( giTree );
1892          Bool  sb = s > stats__max_sitree_size;
1893          Bool  gb = g > stats__max_gitree_size;
1894          if (sb) stats__max_sitree_size = s;
1895          if (gb) stats__max_gitree_size = g;
1896          if (0 && (sb || gb))
1897             VG_(message)(Vg_DebugMsg,
1898                          "exp-sgcheck: new max tree sizes: "
1899                          "StackTree %lu, GlobalTree %lu\n",
1900                          stats__max_sitree_size, stats__max_gitree_size );
1901       }
1902    } else {
1903       caller->blocks_added_by_call = NULL;
1904    }
1905 
1906    /* caller->blocks_added_by_call is used again (and then freed) when
1907       this frame is removed from the stack. */
1908 
1909    if (caller->inner) {
1910       callee = caller->inner;
1911    } else {
1912       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1913       VG_(memset)(callee, 0, sizeof(StackFrame));
1914       callee->outer = caller;
1915       caller->inner = callee;
1916       callee->depth = 1 + caller->depth;
1917       tl_assert(callee->inner == NULL);
1918    }
1919 
1920    /* This sets up .htab, .htab_size and .htab_used */
1921    initialise_II_hash_table( callee );
1922 
1923    callee->creation_sp    = sp_post_call_insn;
1924    callee->sp_at_call     = 0; // not actually required ..
1925    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
1926    callee->blocks_added_by_call = NULL; // .. just for cleanness
1927 
1928    /* record the new running stack frame */
1929    shadowStacks[tid] = callee;
1930 
1931    /* and this thread's query cache is now invalid */
1932    QCache__invalidate( &qcaches[tid] );
1933 
1934    if (0)
1935    { Word d = callee->depth;
1936      const HChar *fnname;
1937      Bool ok;
1938      Addr ip = ip_post_call_insn;
1939      DiEpoch ep = VG_(current_DiEpoch)();
1940      ok = VG_(get_fnname_w_offset)( ep, ip, &fnname );
1941      while (d > 0) {
1942         VG_(printf)(" ");
1943         d--;
1944      }
1945      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1946    }
1947 }
1948 
1949 /* CALLED FROM GENERATED CODE */
1950 static
1951 VG_REGPARM(3)
helperc__new_frame(Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * blocks_at_call_insn,Word sp_adjust)1952 void helperc__new_frame ( Addr sp_post_call_insn,
1953                           Addr fp_at_call_insn,
1954                           Addr ip_post_call_insn,
1955                           XArray* blocks_at_call_insn,
1956                           Word sp_adjust )
1957 {
1958    ThreadId tid = VG_(get_running_tid)();
1959    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
1960    shadowStack_new_frame( tid,
1961                           sp_at_call_insn,
1962                           sp_post_call_insn,
1963                           fp_at_call_insn,
1964                           ip_post_call_insn,
1965                           blocks_at_call_insn );
1966 }
1967 
1968 
1969 ////////////////////////////////////////
1970 /* Primary remove-frame(s) routine.  Called indirectly from
1971    generated code. */
1972 
1973 __attribute__((noinline))
shadowStack_unwind(ThreadId tid,Addr sp_now)1974 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1975 {
1976    StackFrame *innermost, *innermostOrig;
1977    tl_assert(is_sane_TId(tid));
1978    innermost = shadowStacks[tid];
1979    tl_assert(innermost);
1980    innermostOrig = innermost;
1981    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1982    while (1) {
1983       if (!innermost->outer)
1984          break;
1985       if (innermost->inner)
1986          tl_assert(innermost->inner->outer == innermost);
1987       tl_assert(innermost->outer->inner == innermost);
1988       tl_assert(innermost->blocks_added_by_call == NULL);
1989       if (sp_now <= innermost->creation_sp) break;
1990       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
1991       tl_assert(innermost->htab);
1992       if (innermost->htab != &innermost->htab_fixed[0])
1993          sg_free(innermost->htab);
1994       /* be on the safe side */
1995       innermost->creation_sp = 0;
1996       innermost->htab = NULL;
1997       innermost->htab_size = 0;
1998       innermost->htab_used = 0;
1999       innermost->sp_at_call = 0;
2000       innermost->fp_at_call = 0;
2001       innermost->blocks_added_by_call = NULL;
2002       innermost = innermost->outer;
2003 
2004       /* So now we're "back" in the calling frame.  Remove from this
2005          thread's stack-interval-tree, the blocks added at the time of
2006          the call. */
2007 
2008       if (innermost->outer) { /* not at the outermost frame */
2009          if (innermost->blocks_added_by_call == NULL) {
2010          } else {
2011             del_blocks_from_StackTree( siTrees[tid],
2012                                        innermost->blocks_added_by_call );
2013             VG_(deleteXA)( innermost->blocks_added_by_call );
2014             innermost->blocks_added_by_call = NULL;
2015          }
2016       }
2017       /* That completes the required tidying of the interval tree
2018          associated with the frame we just removed. */
2019 
2020       if (0) {
2021          Word d = innermost->depth;
2022          while (d > 0) {
2023             VG_(printf)(" ");
2024             d--;
2025          }
2026          VG_(printf)("X\n");
2027       }
2028 
2029    }
2030 
2031    tl_assert(innermost);
2032 
2033    if (innermost != innermostOrig) {
2034       shadowStacks[tid] = innermost;
2035       /* this thread's query cache is now invalid */
2036       QCache__invalidate( &qcaches[tid] );
2037    }
2038 }
2039 
2040 
2041 
2042 //////////////////////////////////////////////////////////////
2043 //                                                          //
2044 // Instrumentation                                          //
2045 //                                                          //
2046 //////////////////////////////////////////////////////////////
2047 
2048 /* What does instrumentation need to do?
2049 
2050    - at each Call transfer, generate a call to shadowStack_new_frame
2051      do this by manually inspecting the IR
2052 
2053    - at each sp change, if the sp change is negative,
2054      call shadowStack_unwind
2055      do this by asking for SP-change analysis
2056 
2057    - for each memory referencing instruction,
2058      call helperc__mem_access
2059 */
2060 
2061 /* A complication: sg_ instrumentation and h_ instrumentation need to
2062    be interleaved.  Since the latter is a lot more complex than the
2063    former, we split the sg_ instrumentation here into four functions
2064    and let the h_ instrumenter call the four functions as it goes.
2065    Hence the h_ instrumenter drives the sg_ instrumenter.
2066 
2067    To make this viable, the sg_ instrumenter carries what running
2068    state it needs in 'struct _SGEnv'.  This is exported only
2069    abstractly from this file.
2070 */
2071 
2072 struct _SGEnv {
2073    /* the current insn's IP */
2074    Addr curr_IP;
2075    /* whether the above is actually known */
2076    Bool curr_IP_known;
2077    /* if we find a mem ref, is it the first for this insn?  Used for
2078       detecting insns which make more than one memory ref, a situation
2079       we basically can't really handle properly; and so we ignore all
2080       but the first ref. */
2081    Bool firstRef;
2082    /* READONLY */
2083    IRTemp (*newIRTemp_cb)(IRType,void*);
2084    void* newIRTemp_opaque;
2085 };
2086 
2087 
2088 /* --- Helper fns for instrumentation --- */
2089 
gen_Get_SP(struct _SGEnv * sge,IRSB * bbOut,const VexGuestLayout * layout,Int hWordTy_szB)2090 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
2091                            IRSB*           bbOut,
2092                            const VexGuestLayout* layout,
2093                            Int             hWordTy_szB )
2094 {
2095    IRExpr* sp_expr;
2096    IRTemp  sp_temp;
2097    IRType  sp_type;
2098    /* This in effect forces the host and guest word sizes to be the
2099       same. */
2100    tl_assert(hWordTy_szB == layout->sizeof_SP);
2101    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2102    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2103    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2104    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2105    return sp_temp;
2106 }
2107 
gen_Get_FP(struct _SGEnv * sge,IRSB * bbOut,const VexGuestLayout * layout,Int hWordTy_szB)2108 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
2109                            IRSB*           bbOut,
2110                            const VexGuestLayout* layout,
2111                            Int             hWordTy_szB )
2112 {
2113    IRExpr* fp_expr;
2114    IRTemp  fp_temp;
2115    IRType  fp_type;
2116    /* This in effect forces the host and guest word sizes to be the
2117       same. */
2118    tl_assert(hWordTy_szB == layout->sizeof_SP);
2119    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2120    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2121    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2122    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2123    return fp_temp;
2124 }
2125 
instrument_mem_access(struct _SGEnv * sge,IRSB * bbOut,IRExpr * addr,Int szB,Bool isStore,Int hWordTy_szB,Addr curr_IP,const VexGuestLayout * layout)2126 static void instrument_mem_access ( struct _SGEnv* sge,
2127                                     IRSB*   bbOut,
2128                                     IRExpr* addr,
2129                                     Int     szB,
2130                                     Bool    isStore,
2131                                     Int     hWordTy_szB,
2132                                     Addr    curr_IP,
2133                                     const VexGuestLayout* layout )
2134 {
2135    IRType  tyAddr      = Ity_INVALID;
2136    XArray* frameBlocks = NULL;
2137 
2138    tl_assert(isIRAtom(addr));
2139    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2140 
2141    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2142    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2143 
2144 #if defined(VGA_x86)
2145    { UChar* p = (UChar*)curr_IP;
2146      // pop %ebp; RET
2147      if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2148      // pop %ebp; RET $imm16
2149      if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2150      // PUSH %EBP; mov %esp,%ebp
2151      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2152    }
2153 #endif
2154 
2155    /* First off, find or create the StackBlocks for this instruction. */
2156    frameBlocks = get_StackBlocks_for_IP( curr_IP );
2157    tl_assert(frameBlocks);
2158    //if (VG_(sizeXA)( frameBlocks ) == 0)
2159    //   frameBlocks = NULL;
2160 
2161    /* Generate a call to "helperc__mem_access", passing:
2162          addr current_SP current_FP szB curr_IP frameBlocks
2163    */
2164    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2165      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2166      IRExpr** args
2167         = mkIRExprVec_6( addr,
2168                          IRExpr_RdTmp(t_SP),
2169                          IRExpr_RdTmp(t_FP),
2170                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
2171                          mkIRExpr_HWord( curr_IP ),
2172                          mkIRExpr_HWord( (HWord)frameBlocks ) );
2173      IRDirty* di
2174         = unsafeIRDirty_0_N( 3/*regparms*/,
2175                              "helperc__mem_access",
2176                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2177                              args );
2178 
2179      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2180    }
2181 }
2182 
2183 
2184 /* --- Instrumentation main (4 fns) --- */
2185 
sg_instrument_init(IRTemp (* newIRTemp_cb)(IRType,void *),void * newIRTemp_opaque)2186 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2187                                      void* newIRTemp_opaque )
2188 {
2189    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2190                                    sizeof(struct _SGEnv));
2191    tl_assert(env);
2192    env->curr_IP          = 0;
2193    env->curr_IP_known    = False;
2194    env->firstRef         = True;
2195    env->newIRTemp_cb     = newIRTemp_cb;
2196    env->newIRTemp_opaque = newIRTemp_opaque;
2197    return env;
2198 }
2199 
sg_instrument_fini(struct _SGEnv * env)2200 void sg_instrument_fini ( struct _SGEnv * env )
2201 {
2202    sg_free(env);
2203 }
2204 
2205 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2206    as required.  This must be called before 'st' itself is added to
2207    'sbOut'. */
sg_instrument_IRStmt(struct _SGEnv * env,IRSB * sbOut,IRStmt * st,const VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2208 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2209                             /*MOD*/IRSB* sbOut,
2210                             IRStmt* st,
2211                             const VexGuestLayout* layout,
2212                             IRType gWordTy, IRType hWordTy )
2213 {
2214    if (!sg_clo_enable_sg_checks)
2215       return;
2216 
2217    tl_assert(st);
2218    tl_assert(isFlatIRStmt(st));
2219    switch (st->tag) {
2220       case Ist_NoOp:
2221       case Ist_AbiHint:
2222       case Ist_Put:
2223       case Ist_PutI:
2224       case Ist_MBE:
2225          /* None of these can contain any memory references. */
2226          break;
2227 
2228       case Ist_Exit:
2229          tl_assert(st->Ist.Exit.jk != Ijk_Call);
2230          /* else we must deal with a conditional call */
2231          break;
2232 
2233       case Ist_IMark:
2234          env->curr_IP_known = True;
2235          env->curr_IP       = st->Ist.IMark.addr;
2236          env->firstRef      = True;
2237          break;
2238 
2239       case Ist_Store:
2240          tl_assert(env->curr_IP_known);
2241          if (env->firstRef) {
2242             instrument_mem_access(
2243                env, sbOut,
2244                st->Ist.Store.addr,
2245                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2246                True/*isStore*/,
2247                sizeofIRType(hWordTy),
2248                env->curr_IP, layout
2249             );
2250             env->firstRef = False;
2251          }
2252          break;
2253 
2254       case Ist_WrTmp: {
2255          IRExpr* data = st->Ist.WrTmp.data;
2256          if (data->tag == Iex_Load) {
2257             tl_assert(env->curr_IP_known);
2258             if (env->firstRef) {
2259                instrument_mem_access(
2260                   env, sbOut,
2261                   data->Iex.Load.addr,
2262                   sizeofIRType(data->Iex.Load.ty),
2263                   False/*!isStore*/,
2264                   sizeofIRType(hWordTy),
2265                   env->curr_IP, layout
2266                );
2267                env->firstRef = False;
2268             }
2269          }
2270          break;
2271       }
2272 
2273       case Ist_Dirty: {
2274          Int      dataSize;
2275          IRDirty* d = st->Ist.Dirty.details;
2276          if (d->mFx != Ifx_None) {
2277             /* This dirty helper accesses memory.  Collect the
2278                details. */
2279             tl_assert(env->curr_IP_known);
2280             if (env->firstRef) {
2281                tl_assert(d->mAddr != NULL);
2282                tl_assert(d->mSize != 0);
2283                dataSize = d->mSize;
2284                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2285                   instrument_mem_access(
2286                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2287                      sizeofIRType(hWordTy), env->curr_IP, layout
2288                   );
2289                }
2290                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2291                   instrument_mem_access(
2292                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2293                      sizeofIRType(hWordTy), env->curr_IP, layout
2294                   );
2295                }
2296                env->firstRef = False;
2297             }
2298          } else {
2299             tl_assert(d->mAddr == NULL);
2300             tl_assert(d->mSize == 0);
2301          }
2302          break;
2303       }
2304 
2305       case Ist_CAS: {
2306          /* We treat it as a read and a write of the location.  I
2307             think that is the same behaviour as it was before IRCAS
2308             was introduced, since prior to that point, the Vex front
2309             ends would translate a lock-prefixed instruction into a
2310             (normal) read followed by a (normal) write. */
2311          if (env->firstRef) {
2312             Int    dataSize;
2313             IRCAS* cas = st->Ist.CAS.details;
2314             tl_assert(cas->addr != NULL);
2315             tl_assert(cas->dataLo != NULL);
2316             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2317             if (cas->dataHi != NULL)
2318                dataSize *= 2; /* since it's a doubleword-CAS */
2319             instrument_mem_access(
2320                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2321                sizeofIRType(hWordTy), env->curr_IP, layout
2322             );
2323             instrument_mem_access(
2324                env, sbOut, cas->addr, dataSize, True/*isStore*/,
2325                sizeofIRType(hWordTy), env->curr_IP, layout
2326             );
2327             env->firstRef = False;
2328          }
2329          break;
2330       }
2331 
2332       case Ist_LoadG: {
2333          IRLoadG* lg       = st->Ist.LoadG.details;
2334          IRType   type     = Ity_INVALID; /* loaded type */
2335          IRType   typeWide = Ity_INVALID; /* after implicit widening */
2336          IRExpr*  addr     = lg->addr;
2337          typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
2338          tl_assert(type != Ity_INVALID);
2339          instrument_mem_access(
2340             env, sbOut, addr, sizeofIRType(type), False/*isStore*/,
2341             sizeofIRType(hWordTy), env->curr_IP, layout
2342          );
2343          break;
2344       }
2345 
2346       default:
2347          tl_assert(0);
2348 
2349    } /* switch (st->tag) */
2350 }
2351 
2352 
2353 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2354    possibly modify 'env' as required.  This must be the last
2355    instrumentation statement in the block. */
sg_instrument_final_jump(struct _SGEnv * env,IRSB * sbOut,IRExpr * next,IRJumpKind jumpkind,const VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2356 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2357                                 /*MOD*/IRSB* sbOut,
2358                                 IRExpr* next,
2359                                 IRJumpKind jumpkind,
2360                                 const VexGuestLayout* layout,
2361                                 IRType gWordTy, IRType hWordTy )
2362 {
2363    if (!sg_clo_enable_sg_checks)
2364       return;
2365 
2366    if (jumpkind == Ijk_Call) {
2367       // Assumes x86 or amd64
2368       IRTemp   sp_post_call_insn, fp_post_call_insn;
2369       XArray*  frameBlocks;
2370       IRExpr** args;
2371       IRDirty* di;
2372       sp_post_call_insn
2373          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2374       fp_post_call_insn
2375          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2376       tl_assert(env->curr_IP_known);
2377       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2378       tl_assert(frameBlocks);
2379       if (VG_(sizeXA)(frameBlocks) == 0)
2380          frameBlocks = NULL;
2381       args
2382          = mkIRExprVec_5(
2383               IRExpr_RdTmp(sp_post_call_insn),
2384               IRExpr_RdTmp(fp_post_call_insn),
2385                          /* assume the call doesn't change FP */
2386               next,
2387               mkIRExpr_HWord( (HWord)frameBlocks ),
2388               mkIRExpr_HWord( sizeofIRType(gWordTy) )
2389            );
2390       di = unsafeIRDirty_0_N(
2391               3/*regparms*/,
2392               "helperc__new_frame",
2393               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2394               args );
2395       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2396    }
2397 }
2398 
2399 
2400 //////////////////////////////////////////////////////////////
2401 //                                                          //
2402 // end Instrumentation                                      //
2403 //                                                          //
2404 //////////////////////////////////////////////////////////////
2405 
2406 
2407 //////////////////////////////////////////////////////////////
2408 //                                                          //
2409 // misc                                                     //
2410 //                                                          //
2411 //////////////////////////////////////////////////////////////
2412 
2413 /* Make a new empty stack frame that is suitable for being the
2414    outermost frame in a stack.  It has a creation_sp of effectively
2415    infinity, so it can never be removed. */
new_root_StackFrame(void)2416 static StackFrame* new_root_StackFrame ( void )
2417 {
2418    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2419    VG_(memset)( sframe, 0, sizeof(*sframe) );
2420    sframe->creation_sp = ~0UL;
2421 
2422    /* This sets up .htab, .htab_size and .htab_used */
2423    initialise_II_hash_table( sframe );
2424 
2425    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2426 
2427    return sframe;
2428 }
2429 
2430 /* Primary routine for setting up the shadow stack for a new thread.
2431    Note that this is used to create not only child thread stacks, but
2432    the root thread's stack too.  We create a new stack with
2433    .creation_sp set to infinity, so that the outermost frame can never
2434    be removed (by shadowStack_unwind).  The core calls this function
2435    as soon as a thread is created.  We cannot yet get its SP value,
2436    since that may not yet be set. */
shadowStack_thread_create(ThreadId parent,ThreadId child)2437 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2438 {
2439    tl_assert(is_sane_TId(child));
2440    if (parent == VG_INVALID_THREADID) {
2441       /* creating the main thread's stack */
2442    } else {
2443       tl_assert(is_sane_TId(parent));
2444       tl_assert(parent != child);
2445       tl_assert(shadowStacks[parent] != NULL);
2446       tl_assert(siTrees[parent] != NULL);
2447    }
2448 
2449    /* Create the child's stack.  Bear in mind we may be re-using
2450       it. */
2451    if (shadowStacks[child] == NULL) {
2452       /* First use of this stack.  Just allocate an initial frame. */
2453       tl_assert(siTrees[child] == NULL);
2454    } else {
2455       StackFrame *frame, *frame2;
2456       /* re-using a stack. */
2457       /* get rid of the interval tree */
2458       tl_assert(siTrees[child] != NULL);
2459       delete_StackTree( siTrees[child] );
2460       siTrees[child] = NULL;
2461       /* Throw away all existing frames. */
2462       frame = shadowStacks[child];
2463       while (frame->outer)
2464          frame = frame->outer;
2465       tl_assert(frame->depth == 0);
2466       while (frame) {
2467          frame2 = frame->inner;
2468          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2469          sg_free(frame);
2470          frame = frame2;
2471       }
2472       shadowStacks[child] = NULL;
2473    }
2474 
2475    tl_assert(shadowStacks[child] == NULL);
2476    tl_assert(siTrees[child] == NULL);
2477 
2478    /* Set up the initial stack frame. */
2479    shadowStacks[child] = new_root_StackFrame();
2480 
2481    /* and set up the child's stack block interval tree. */
2482    siTrees[child] = new_StackTree();
2483 }
2484 
2485 /* Once a thread is ready to go, the core calls here.  We take the
2486    opportunity to push a second frame on its stack, with the
2487    presumably valid SP value that is going to be used for the thread's
2488    startup.  Hence we should always wind up with a valid outermost
2489    frame for the thread. */
shadowStack_set_initial_SP(ThreadId tid)2490 static void shadowStack_set_initial_SP ( ThreadId tid )
2491 {
2492    StackFrame* sf;
2493    tl_assert(is_sane_TId(tid));
2494    sf = shadowStacks[tid];
2495    tl_assert(sf != NULL);
2496    tl_assert(sf->outer == NULL);
2497    tl_assert(sf->inner == NULL);
2498    tl_assert(sf->creation_sp == ~0UL);
2499    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2500                                0, VG_(get_IP)(tid), NULL );
2501 }
2502 
2503 
2504 //////////////////////////////////////////////////////////////
2505 //                                                          //
2506 // main-ish                                                 //
2507 //                                                          //
2508 //////////////////////////////////////////////////////////////
2509 
2510 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
2511    sp-change analysis, as requested in pc_pre_clo_int(). */
sg_die_mem_stack(Addr old_SP,SizeT len)2512 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2513    ThreadId  tid = VG_(get_running_tid)();
2514    shadowStack_unwind( tid, old_SP+len );
2515 }
2516 
sg_pre_clo_init(void)2517 void sg_pre_clo_init ( void ) {
2518    ourGlobals_init();
2519    init_StackBlocks_set();
2520    init_GlobalBlock_set();
2521 }
2522 
sg_post_clo_init(void)2523 void sg_post_clo_init ( void ) {
2524 }
2525 
sg_pre_thread_ll_create(ThreadId parent,ThreadId child)2526 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2527    shadowStack_thread_create(parent, child);
2528 }
2529 
sg_pre_thread_first_insn(ThreadId tid)2530 void sg_pre_thread_first_insn ( ThreadId tid ) {
2531    shadowStack_set_initial_SP(tid);
2532 }
2533 
sg_fini(Int exitcode)2534 void sg_fini(Int exitcode)
2535 {
2536    if (VG_(clo_stats)) {
2537       VG_(message)(Vg_DebugMsg,
2538          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
2539       VG_(message)(Vg_DebugMsg,
2540          " sg_:     stack0: %'12llu classify\n",
2541          stats__classify_Stack0);
2542       VG_(message)(Vg_DebugMsg,
2543          " sg_:     stackN: %'12llu classify\n",
2544          stats__classify_StackN);
2545       VG_(message)(Vg_DebugMsg,
2546          " sg_:     global: %'12llu classify\n",
2547          stats__classify_Global);
2548       VG_(message)(Vg_DebugMsg,
2549          " sg_:    unknown: %'12llu classify\n",
2550          stats__classify_Unknown);
2551       VG_(message)(Vg_DebugMsg,
2552          " sg_:  %'llu Invars preened, of which %'llu changed\n",
2553          stats__Invars_preened, stats__Invars_changed);
2554       VG_(message)(Vg_DebugMsg,
2555          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2556       VG_(message)(Vg_DebugMsg,
2557          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
2558          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2559       VG_(message)(Vg_DebugMsg,
2560          " sg_:  htab-fast: %'llu hits\n",
2561          stats__htab_fast);
2562       VG_(message)(Vg_DebugMsg,
2563          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2564          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2565    }
2566 }
2567 
2568 
2569 /*--------------------------------------------------------------------*/
2570 /*--- end                                                sg_main.c ---*/
2571 /*--------------------------------------------------------------------*/
2572