1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info.             ---*/
5 /*---                                                    storage.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2017 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 /* This file manages the data structures built by the debuginfo
34    system.  These are: the top level SegInfo list.  For each SegInfo,
35    there are tables for address-to-symbol mappings,
36    address-to-src-file/line mappings, and address-to-CFI-info
37    mappings.
38 */
39 
40 #include "pub_core_basics.h"
41 #include "pub_core_options.h"      /* VG_(clo_verbosity) */
42 #include "pub_core_debuginfo.h"
43 #include "pub_core_debuglog.h"
44 #include "pub_core_libcassert.h"
45 #include "pub_core_libcbase.h"
46 #include "pub_core_libcprint.h"
47 #include "pub_core_xarray.h"
48 #include "pub_core_oset.h"
49 #include "pub_core_deduppoolalloc.h"
50 
51 #include "priv_misc.h"         /* dinfo_zalloc/free/strdup */
52 #include "priv_image.h"
53 #include "priv_d3basics.h"     /* ML_(pp_GX) */
54 #include "priv_tytypes.h"
55 #include "priv_storage.h"      /* self */
56 
57 
58 /*------------------------------------------------------------*/
59 /*--- Misc (printing, errors)                              ---*/
60 /*------------------------------------------------------------*/
61 
62 /* Show a non-fatal debug info reading error.  Use VG_(core_panic) for
63    fatal errors.  'serious' errors are shown regardless of the
64    verbosity setting. */
ML_(symerr)65 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg )
66 {
67    /* XML mode hides everything :-( */
68    if (VG_(clo_xml))
69       return;
70 
71    if (serious) {
72 
73       VG_(message)(Vg_DebugMsg, "WARNING: Serious error when "
74                                 "reading debug info\n");
75       if (True || VG_(clo_verbosity) < 2) {
76          /* Need to show what the file name is, at verbosity levels 2
77             or below, since that won't already have been shown */
78          VG_(message)(Vg_DebugMsg,
79                       "When reading debug info from %s:\n",
80                       (di && di->fsm.filename) ? di->fsm.filename
81                                                : "???");
82       }
83       VG_(message)(Vg_DebugMsg, "%s\n", msg);
84 
85    } else { /* !serious */
86 
87       if (VG_(clo_verbosity) >= 2)
88          VG_(message)(Vg_DebugMsg, "%s\n", msg);
89 
90    }
91 }
92 
93 
94 /* Print a symbol. */
ML_(ppSym)95 void ML_(ppSym) ( Int idx, const DiSym* sym )
96 {
97    const HChar** sec_names = sym->sec_names;
98    vg_assert(sym->pri_name);
99    if (sec_names)
100       vg_assert(sec_names);
101    VG_(printf)( "%5d:  %c%c%c %#8lx .. %#8lx (%u)      %s%s",
102                 idx,
103                 sym->isText ? 'T' : '-',
104                 sym->isIFunc ? 'I' : '-',
105                 sym->isGlobal ? 'G' : '-',
106                 sym->avmas.main,
107                 sym->avmas.main + sym->size - 1, sym->size,
108                 sym->pri_name, sec_names ? " " : "" );
109    if (sec_names) {
110       while (*sec_names) {
111          VG_(printf)("%s%s", *sec_names, *(sec_names+1) ? " " : "");
112          sec_names++;
113       }
114    }
115    VG_(printf)("\n");
116 }
117 
118 /* Print a call-frame-info summary. */
ML_(ppDiCfSI)119 void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
120                      Addr base, UInt len,
121                      const DiCfSI_m* si_m )
122 {
123 #  define SHOW_HOW(_how, _off)                   \
124       do {                                       \
125          if (_how == CFIR_UNKNOWN) {             \
126             VG_(printf)("Unknown");              \
127          } else                                  \
128          if (_how == CFIR_SAME) {                \
129             VG_(printf)("Same");                 \
130          } else                                  \
131          if (_how == CFIR_CFAREL) {              \
132             VG_(printf)("cfa+%d", _off);         \
133          } else                                  \
134          if (_how == CFIR_MEMCFAREL) {           \
135             VG_(printf)("*(cfa+%d)", _off);      \
136          } else                                  \
137          if (_how == CFIR_EXPR) {                \
138             VG_(printf)("{");                    \
139             ML_(ppCfiExpr)(exprs, _off);         \
140             VG_(printf)("}");                    \
141          } else {                                \
142             vg_assert(0+0);                      \
143          }                                       \
144       } while (0)
145 
146    if (base != 0 || len != 0)
147       VG_(printf)("[%#lx .. %#lx]: ", base, base + len - 1);
148    else
149       VG_(printf)("[]: ");
150 
151    switch (si_m->cfa_how) {
152       case CFIC_IA_SPREL:
153          VG_(printf)("let cfa=oldSP+%d", si_m->cfa_off);
154          break;
155       case CFIC_IA_BPREL:
156          VG_(printf)("let cfa=oldBP+%d", si_m->cfa_off);
157          break;
158       case CFIC_ARM_R13REL:
159          VG_(printf)("let cfa=oldR13+%d", si_m->cfa_off);
160          break;
161       case CFIC_ARM_R12REL:
162          VG_(printf)("let cfa=oldR12+%d", si_m->cfa_off);
163          break;
164       case CFIC_ARM_R11REL:
165          VG_(printf)("let cfa=oldR11+%d", si_m->cfa_off);
166          break;
167       case CFIR_SAME:
168          VG_(printf)("let cfa=Same");
169          break;
170       case CFIC_ARM_R7REL:
171          VG_(printf)("let cfa=oldR7+%d", si_m->cfa_off);
172          break;
173       case CFIC_ARM64_SPREL:
174          VG_(printf)("let cfa=oldSP+%d", si_m->cfa_off);
175          break;
176       case CFIC_ARM64_X29REL:
177          VG_(printf)("let cfa=oldX29+%d", si_m->cfa_off);
178          break;
179       case CFIC_EXPR:
180          VG_(printf)("let cfa={");
181          ML_(ppCfiExpr)(exprs, si_m->cfa_off);
182          VG_(printf)("}");
183          break;
184       default:
185          vg_assert(0);
186    }
187 
188    VG_(printf)(" in RA=");
189    SHOW_HOW(si_m->ra_how, si_m->ra_off);
190 #  if defined(VGA_x86) || defined(VGA_amd64)
191    VG_(printf)(" SP=");
192    SHOW_HOW(si_m->sp_how, si_m->sp_off);
193    VG_(printf)(" BP=");
194    SHOW_HOW(si_m->bp_how, si_m->bp_off);
195 #  elif defined(VGA_arm)
196    VG_(printf)(" R14=");
197    SHOW_HOW(si_m->r14_how, si_m->r14_off);
198    VG_(printf)(" R13=");
199    SHOW_HOW(si_m->r13_how, si_m->r13_off);
200    VG_(printf)(" R12=");
201    SHOW_HOW(si_m->r12_how, si_m->r12_off);
202    VG_(printf)(" R11=");
203    SHOW_HOW(si_m->r11_how, si_m->r11_off);
204    VG_(printf)(" R7=");
205    SHOW_HOW(si_m->r7_how, si_m->r7_off);
206 #  elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
207 #  elif defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
208    VG_(printf)(" SP=");
209    SHOW_HOW(si_m->sp_how, si_m->sp_off);
210    VG_(printf)(" FP=");
211    SHOW_HOW(si_m->fp_how, si_m->fp_off);
212 #  elif defined(VGA_arm64)
213    VG_(printf)(" SP=");
214    SHOW_HOW(si_m->sp_how, si_m->sp_off);
215    VG_(printf)(" X30=");
216    SHOW_HOW(si_m->x30_how, si_m->x30_off);
217    VG_(printf)(" X29=");
218    SHOW_HOW(si_m->x29_how, si_m->x29_off);
219 #  else
220 #    error "Unknown arch"
221 #  endif
222    VG_(printf)("\n");
223 #  undef SHOW_HOW
224 }
225 
226 
227 /*------------------------------------------------------------*/
228 /*--- Adding stuff                                         ---*/
229 /*------------------------------------------------------------*/
230 
231 /* If not yet in strpool, add a str to the string pool including terminating
232    zero.
233    Return the pointer to the string in strpool.
234 */
ML_(addStr)235 const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len )
236 {
237    if (len == -1) {
238       len = VG_(strlen)(str);
239    } else {
240       vg_assert(len >= 0);
241    }
242    if (UNLIKELY(di->strpool == NULL))
243       di->strpool = VG_(newDedupPA)(SEGINFO_STRPOOLSIZE,
244                                     1,
245                                     ML_(dinfo_zalloc),
246                                     "di.storage.addStr.1",
247                                     ML_(dinfo_free));
248    return VG_(allocEltDedupPA) (di->strpool, len+1, str);
249 }
250 
ML_(addFnDn)251 UInt ML_(addFnDn) (struct _DebugInfo* di,
252                    const HChar* filename,
253                    const HChar* dirname)
254 {
255    FnDn fndn;
256    UInt fndn_ix;
257 
258    if (UNLIKELY(di->fndnpool == NULL))
259       di->fndnpool = VG_(newDedupPA)(500,
260                                      vg_alignof(FnDn),
261                                      ML_(dinfo_zalloc),
262                                      "di.storage.addFnDn.1",
263                                      ML_(dinfo_free));
264    fndn.filename = ML_(addStr)(di, filename, -1);
265    fndn.dirname = dirname ? ML_(addStr)(di, dirname, -1) : NULL;
266    fndn_ix = VG_(allocFixedEltDedupPA) (di->fndnpool, sizeof(FnDn), &fndn);
267    return fndn_ix;
268 }
269 
ML_(fndn_ix2filename)270 const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
271                                     UInt fndn_ix)
272 {
273    FnDn *fndn;
274    if (fndn_ix == 0)
275       return "???";
276    else {
277       fndn = VG_(indexEltNumber) (di->fndnpool, fndn_ix);
278       return fndn->filename;
279    }
280 }
281 
ML_(fndn_ix2dirname)282 const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
283                                    UInt fndn_ix)
284 {
285    FnDn *fndn;
286    if (fndn_ix == 0)
287       return "";
288    else {
289       fndn = VG_(indexEltNumber) (di->fndnpool, fndn_ix);
290       if (fndn->dirname)
291          return fndn->dirname;
292       else
293          return "";
294    }
295 }
296 
297 /* Add a string to the string table of a DebugInfo, by copying the
298    string from the given DiCursor.  Measures the length of the string
299    itself. */
ML_(addStrFromCursor)300 const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c )
301 {
302    /* This is a less-than-stellar implementation, but it should
303       work. */
304    vg_assert(ML_(cur_is_valid)(c));
305    HChar* str = ML_(cur_read_strdup)(c, "di.addStrFromCursor.1");
306    const HChar* res = ML_(addStr)(di, str, -1);
307    ML_(dinfo_free)(str);
308    return res;
309 }
310 
311 
312 /* Add a symbol to the symbol table, by copying *sym.  'sym' may only
313    have one name, so there's no complexities to do with deep vs
314    shallow copying of the sec_name array.  This is checked.
315 */
ML_(addSym)316 void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym )
317 {
318    UInt   new_sz, i;
319    DiSym* new_tab;
320 
321    vg_assert(sym->pri_name != NULL);
322    vg_assert(sym->sec_names == NULL);
323 
324 #if defined(VGO_dragonfly)
325    if (sym->size == 0)
326       sym->size = 1;
327 #endif
328 
329    /* Ignore zero-sized syms. */
330    if (sym->size == 0) return;
331 
332    if (di->symtab_used == di->symtab_size) {
333       new_sz = 2 * di->symtab_size;
334       if (new_sz == 0) new_sz = 500;
335       new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1",
336                                    new_sz * sizeof(DiSym) );
337       if (di->symtab != NULL) {
338          for (i = 0; i < di->symtab_used; i++)
339             new_tab[i] = di->symtab[i];
340          ML_(dinfo_free)(di->symtab);
341       }
342       di->symtab = new_tab;
343       di->symtab_size = new_sz;
344    }
345 
346    di->symtab[di->symtab_used++] = *sym;
347    vg_assert(di->symtab_used <= di->symtab_size);
348 }
349 
ML_(fndn_ix)350 UInt ML_(fndn_ix) (const DebugInfo* di, Word locno)
351 {
352    UInt fndn_ix;
353 
354    switch(di->sizeof_fndn_ix) {
355       case 1: fndn_ix = ((UChar*)  di->loctab_fndn_ix)[locno]; break;
356       case 2: fndn_ix = ((UShort*) di->loctab_fndn_ix)[locno]; break;
357       case 4: fndn_ix = ((UInt*)   di->loctab_fndn_ix)[locno]; break;
358       default: vg_assert(0);
359    }
360    return fndn_ix;
361 }
362 
set_fndn_ix(struct _DebugInfo * di,Word locno,UInt fndn_ix)363 static inline void set_fndn_ix (struct _DebugInfo* di, Word locno, UInt fndn_ix)
364 {
365    Word i;
366 
367    switch(di->sizeof_fndn_ix) {
368       case 1:
369          if (LIKELY (fndn_ix <= 255)) {
370             ((UChar*) di->loctab_fndn_ix)[locno] = fndn_ix;
371             return;
372          }
373          {
374             UChar* old = (UChar*) di->loctab_fndn_ix;
375             UShort* new = ML_(dinfo_zalloc)( "di.storage.sfix.1",
376                                              di->loctab_size * 2 );
377             for (i = 0; i < di->loctab_used; i++)
378                new[i] = old[i];
379             ML_(dinfo_free)(old);
380             di->sizeof_fndn_ix = 2;
381             di->loctab_fndn_ix = new;
382          }
383          // Fallthrough
384 
385       case 2:
386          if (LIKELY (fndn_ix <= 65535)) {
387             ((UShort*) di->loctab_fndn_ix)[locno] = fndn_ix;
388             return;
389          }
390          {
391             UShort* old = (UShort*) di->loctab_fndn_ix;
392             UInt* new = ML_(dinfo_zalloc)( "di.storage.sfix.2",
393                                            di->loctab_size * 4 );
394             for (i = 0; i < di->loctab_used; i++)
395                new[i] = old[i];
396             ML_(dinfo_free)(old);
397             di->sizeof_fndn_ix = 4;
398             di->loctab_fndn_ix = new;
399          }
400          // Fallthrough
401 
402       case 4:
403          ((UInt*) di->loctab_fndn_ix)[locno] = fndn_ix;
404          return;
405 
406       default: vg_assert(0);
407    }
408 }
409 
410 
411 // Comment the below line to trace LOCTAB merging/canonicalising
412 #define TRACE_LOCTAB_CANON(msg,prev_loc,cur_loc)
413 #ifndef TRACE_LOCTAB_CANON
414 #define TRACE_LOCTAB_CANON(msg,prev_loc,cur_loc)                        \
415    VG_(printf)("%s previous: addr %#lx, size %d, line %d, "             \
416                " current: addr %#lx, size %d, line %d.\n",              \
417                msg,                                                     \
418                (prev_loc)->addr, (prev_loc)->size, (prev_loc)->lineno,  \
419                (cur_loc)->addr, (cur_loc)->size, (cur_loc)->lineno);
420 #endif
421 
422 /* Add a location to the location table.
423 */
addLoc(struct _DebugInfo * di,DiLoc * loc,UInt fndn_ix)424 static void addLoc ( struct _DebugInfo* di, DiLoc* loc, UInt fndn_ix )
425 {
426    /* Zero-sized locs should have been ignored earlier */
427    vg_assert(loc->size > 0);
428 
429    /* Check if the last entry has adjacent range for the same line. */
430    if (di->loctab_used > 0) {
431       DiLoc *previous = &di->loctab[di->loctab_used - 1];
432       if ((previous->lineno == loc->lineno)
433           && (previous->addr + previous->size == loc->addr)) {
434          if (previous->size + loc->size <= MAX_LOC_SIZE) {
435             TRACE_LOCTAB_CANON ("addLoc merging", previous, loc);
436             previous->size += loc->size;
437             return;
438          } else {
439             TRACE_LOCTAB_CANON ("addLoc merging not done (maxsize)",
440                                 previous, loc);
441          }
442       }
443    }
444 
445    if (di->loctab_used == di->loctab_size) {
446       UInt   new_sz;
447       DiLoc* new_loctab;
448       void*  new_loctab_fndn_ix;
449 
450       new_sz = 2 * di->loctab_size;
451       if (new_sz == 0) new_sz = 500;
452       new_loctab = ML_(dinfo_zalloc)( "di.storage.addLoc.1",
453                                       new_sz * sizeof(DiLoc) );
454       if (di->sizeof_fndn_ix == 0)
455          di->sizeof_fndn_ix = 1; // To start with.
456       new_loctab_fndn_ix = ML_(dinfo_zalloc)( "di.storage.addLoc.2",
457                                               new_sz * di->sizeof_fndn_ix );
458       if (di->loctab != NULL) {
459          VG_(memcpy)(new_loctab, di->loctab,
460                      di->loctab_used * sizeof(DiLoc));
461          VG_(memcpy)(new_loctab_fndn_ix, di->loctab_fndn_ix,
462                      di->loctab_used * di->sizeof_fndn_ix);
463          ML_(dinfo_free)(di->loctab);
464          ML_(dinfo_free)(di->loctab_fndn_ix);
465       }
466       di->loctab = new_loctab;
467       di->loctab_fndn_ix = new_loctab_fndn_ix;
468       di->loctab_size = new_sz;
469    }
470 
471    di->loctab[di->loctab_used] = *loc;
472    set_fndn_ix (di, di->loctab_used, fndn_ix);
473    di->loctab_used++;
474    vg_assert(di->loctab_used <= di->loctab_size);
475 }
476 
477 
478 /* Resize the LocTab (line number table) to save memory, by removing
479    (and, potentially, allowing m_mallocfree to unmap) any unused space
480    at the end of the table. */
shrinkLocTab(struct _DebugInfo * di)481 static void shrinkLocTab ( struct _DebugInfo* di )
482 {
483    UWord new_sz = di->loctab_used;
484    if (new_sz == di->loctab_size) return;
485    vg_assert(new_sz < di->loctab_size);
486    ML_(dinfo_shrink_block)( di->loctab, new_sz * sizeof(DiLoc));
487    ML_(dinfo_shrink_block)( di->loctab_fndn_ix, new_sz * di->sizeof_fndn_ix);
488    di->loctab_size = new_sz;
489 }
490 
491 #define COMPLAIN_ONCE(what, limit, limit_op)                   \
492    {                                                           \
493    static Bool complained = False;                             \
494    if (!complained) {                                          \
495       complained = True;                                       \
496       VG_(message)(Vg_UserMsg,                                 \
497                    "warning: Can't handle " what " with "      \
498                    "line number %d " limit_op " than %d\n",    \
499                    lineno, limit);                             \
500       VG_(message)(Vg_UserMsg,                                 \
501                    "(Nb: this message is only shown once)\n"); \
502    } \
503 }
504 
505 
506 /* Top-level place to call to add a source-location mapping entry.
507 */
ML_(addLineInfo)508 void ML_(addLineInfo) ( struct _DebugInfo* di,
509                         UInt     fndn_ix,
510                         Addr     this,
511                         Addr     next,
512                         Int      lineno,
513                         Int      entry /* only needed for debug printing */
514      )
515 {
516    static const Bool debug = False;
517    DiLoc loc;
518    UWord size = next - this;
519 
520    /* Ignore zero-sized locs */
521    if (this == next) return;
522 
523    if (debug) {
524       FnDn *fndn = VG_(indexEltNumber) (di->fndnpool, fndn_ix);
525       VG_(printf)( "  src ix %u %s %s line %d %#lx-%#lx\n",
526                    fndn_ix,
527                    fndn->dirname ? fndn->dirname : "(unknown)",
528                    fndn->filename, lineno, this, next );
529    }
530 
531    /* Maximum sanity checking.  Some versions of GNU as do a shabby
532     * job with stabs entries; if anything looks suspicious, revert to
533     * a size of 1.  This should catch the instruction of interest
534     * (since if using asm-level debug info, one instruction will
535     * correspond to one line, unlike with C-level debug info where
536     * multiple instructions can map to the one line), but avoid
537     * catching any other instructions bogusly. */
538    if (this > next) {
539        if (VG_(clo_verbosity) > 2) {
540            VG_(message)(Vg_DebugMsg,
541                         "warning: line info addresses out of order "
542                         "at entry %d: 0x%lx 0x%lx\n", entry, this, next);
543        }
544        size = 1;
545    }
546 
547    if (size > MAX_LOC_SIZE) {
548        if (0)
549        VG_(message)(Vg_DebugMsg,
550                     "warning: line info address range too large "
551                     "at entry %d: %lu\n", entry, size);
552        size = 1;
553    }
554 
555    /* At this point, we know that the original value for |size|, viz
556       |next - this|, will only still be used in the case where
557       |this| <u |next|, so it can't have underflowed.  Considering
558       that and the three checks that follow it, the following must
559       hold. */
560    vg_assert(size >= 1);
561    vg_assert(size <= MAX_LOC_SIZE);
562 
563    /* Rule out ones which are completely outside the r-x mapped area.
564       See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
565       for background and rationale. */
566    vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
567    if (ML_(find_rx_mapping)(di, this, this + size - 1) == NULL) {
568        if (0)
569           VG_(message)(Vg_DebugMsg,
570                        "warning: ignoring line info entry falling "
571                        "outside current DebugInfo: %#lx %#lx %#lx %#lx\n",
572                        di->text_avma,
573                        di->text_avma + di->text_size,
574                        this, this + size - 1);
575        return;
576    }
577 
578    if (lineno < 0) {
579       COMPLAIN_ONCE("line info entry", 0, "smaller");
580       return;
581    }
582    if (lineno > MAX_LINENO) {
583       COMPLAIN_ONCE("line info entry", MAX_LINENO, "greater");
584       return;
585    }
586 
587    loc.addr      = this;
588    loc.size      = (UShort)size;
589    loc.lineno    = lineno;
590 
591    if (0) VG_(message)(Vg_DebugMsg,
592 		       "addLoc: addr %#lx, size %lu, line %d, fndn_ix %u\n",
593 		       this,size,lineno,fndn_ix);
594 
595    addLoc ( di, &loc, fndn_ix );
596 }
597 
598 /* Add an inlined call info to the inlined call table.
599 */
addInl(struct _DebugInfo * di,DiInlLoc * inl)600 static void addInl ( struct _DebugInfo* di, DiInlLoc* inl )
601 {
602    UInt   new_sz, i;
603    DiInlLoc* new_tab;
604 
605    /* empty inl should have been ignored earlier */
606    vg_assert(inl->addr_lo < inl->addr_hi);
607 
608    if (di->inltab_used == di->inltab_size) {
609       new_sz = 2 * di->inltab_size;
610       if (new_sz == 0) new_sz = 500;
611       new_tab = ML_(dinfo_zalloc)( "di.storage.addInl.1",
612                                    new_sz * sizeof(DiInlLoc) );
613       if (di->inltab != NULL) {
614          for (i = 0; i < di->inltab_used; i++)
615             new_tab[i] = di->inltab[i];
616          ML_(dinfo_free)(di->inltab);
617       }
618       di->inltab = new_tab;
619       di->inltab_size = new_sz;
620    }
621 
622    di->inltab[di->inltab_used] = *inl;
623    if (inl->addr_hi - inl->addr_lo > di->maxinl_codesz)
624       di->maxinl_codesz = inl->addr_hi - inl->addr_lo;
625    di->inltab_used++;
626    vg_assert(di->inltab_used <= di->inltab_size);
627 }
628 
629 
630 /* Resize the InlTab (inlined call table) to save memory, by removing
631    (and, potentially, allowing m_mallocfree to unmap) any unused space
632    at the end of the table. */
shrinkInlTab(struct _DebugInfo * di)633 static void shrinkInlTab ( struct _DebugInfo* di )
634 {
635    UWord new_sz = di->inltab_used;
636    if (new_sz == di->inltab_size) return;
637    vg_assert(new_sz < di->inltab_size);
638    ML_(dinfo_shrink_block)( di->inltab, new_sz * sizeof(DiInlLoc));
639    di->inltab_size = new_sz;
640 }
641 
642 /* Top-level place to call to add a addr-to-inlined fn info. */
ML_(addInlInfo)643 void ML_(addInlInfo) ( struct _DebugInfo* di,
644                        Addr addr_lo, Addr addr_hi,
645                        const HChar* inlinedfn,
646                        UInt fndn_ix,
647                        Int lineno, UShort level)
648 {
649    DiInlLoc inl;
650 
651    /* Similar paranoia as in ML_(addLineInfo). Unclear if needed. */
652    if (addr_lo >= addr_hi) {
653        if (VG_(clo_verbosity) > 2) {
654            VG_(message)(Vg_DebugMsg,
655                         "warning: inlined info addresses out of order "
656                         "at: 0x%lx 0x%lx\n", addr_lo, addr_hi);
657        }
658        addr_hi = addr_lo + 1;
659    }
660 
661    if (lineno < 0) {
662       COMPLAIN_ONCE ("inlined call info entry", 0, "smaller");
663       return;
664    }
665    if (lineno > MAX_LINENO) {
666       COMPLAIN_ONCE ("inlined call info entry", MAX_LINENO, "greater");
667       return;
668    }
669 
670    // code resulting from inlining of inlinedfn:
671    inl.addr_lo   = addr_lo;
672    inl.addr_hi   = addr_hi;
673    inl.inlinedfn = inlinedfn;
674    // caller:
675    inl.fndn_ix   = fndn_ix;
676    inl.lineno    = lineno;
677    inl.level     = level;
678 
679    if (0) VG_(message)
680              (Vg_DebugMsg,
681               "addInlInfo: fn %s inlined as addr_lo %#lx,addr_hi %#lx,"
682               "caller fndn_ix %u %s:%d\n",
683               inlinedfn, addr_lo, addr_hi, fndn_ix,
684               ML_(fndn_ix2filename) (di, fndn_ix), lineno);
685 
686    addInl ( di, &inl );
687 }
688 
ML_(get_cfsi_m)689 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos)
690 {
691    UInt cfsi_m_ix;
692 
693    vg_assert(pos >= 0 && pos < di->cfsi_used);
694    switch (di->sizeof_cfsi_m_ix) {
695       case 1: cfsi_m_ix = ((UChar*)  di->cfsi_m_ix)[pos]; break;
696       case 2: cfsi_m_ix = ((UShort*) di->cfsi_m_ix)[pos]; break;
697       case 4: cfsi_m_ix = ((UInt*)   di->cfsi_m_ix)[pos]; break;
698       default: vg_assert(0);
699    }
700    if (cfsi_m_ix == 0)
701       return NULL; // cfi hole
702    else
703       return VG_(indexEltNumber) (di->cfsi_m_pool, cfsi_m_ix);
704 }
705 
706 /* Top-level place to call to add a CFI summary record.  The supplied
707    DiCfSI_m is copied. */
ML_(addDiCfSI)708 void ML_(addDiCfSI) ( struct _DebugInfo* di,
709                       Addr base, UInt len, DiCfSI_m* cfsi_m )
710 {
711    static const Bool debug = False;
712    UInt    new_sz;
713    DiCfSI* new_tab;
714    SSizeT  delta;
715    DebugInfoMapping* map;
716    DebugInfoMapping* map2;
717 
718    if (debug) {
719       VG_(printf)("adding DiCfSI: ");
720       ML_(ppDiCfSI)(di->cfsi_exprs, base, len, cfsi_m);
721    }
722 
723    /* sanity */
724    vg_assert(len > 0);
725    /* Issue a warning if LEN is unexpectedly large (exceeds 5 million).
726       The implication is you have a single procedure
727       with more than 5 million bytes of code.  Which is pretty
728       unlikely.  Either that, or the debuginfo reader is somehow
729       broken.  5 million is of course arbitrary; but it's big enough
730       to be bigger than the size of any plausible piece of code that
731       would fall within a single procedure. But occasionally it does
732       happen (c.f. BZ #339542). */
733    if (len >= 5000000)
734       VG_(message)(Vg_DebugMsg,
735                    "warning: DiCfSI %#lx .. %#lx is huge; length = %u (%s)\n",
736                    base, base + len - 1, len, di->soname);
737 
738    vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
739    /* Find mapping where at least one end of the CFSI falls into. */
740    map  = ML_(find_rx_mapping)(di, base, base);
741    map2 = ML_(find_rx_mapping)(di, base + len - 1,
742                                    base + len - 1);
743    if (map == NULL)
744       map = map2;
745    else if (map2 == NULL)
746       map2 = map;
747 
748    /* Rule out ones which are completely outside the r-x mapped area
749       (or which span across different areas).
750       See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
751       for background and rationale. */
752    if (map == NULL || map != map2) {
753       static Int complaints = 10;
754       if (VG_(clo_trace_cfi) || complaints > 0) {
755          complaints--;
756          if (VG_(clo_verbosity) > 1) {
757             VG_(message)(
758                Vg_DebugMsg,
759                "warning: DiCfSI %#lx .. %#lx outside mapped rx segments (%s)\n",
760                base,
761                base + len - 1,
762                di->soname
763             );
764          }
765          if (VG_(clo_trace_cfi))
766             ML_(ppDiCfSI)(di->cfsi_exprs, base, len, cfsi_m);
767       }
768       return;
769    }
770 
771    /* Now we know the range is at least partially inside the r-x
772       mapped area.  That implies that at least one of the ends of the
773       range falls inside the area.  If necessary, clip it so it is
774       completely within the area.  If we don't do this,
775       check_CFSI_related_invariants() in debuginfo.c (invariant #2)
776       will fail.  See
777       "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
778       priv_storage.h for background. */
779    if (base < map->avma) {
780       /* Lower end is outside the mapped area.  Hence upper end must
781          be inside it. */
782       if (0) VG_(printf)("XXX truncate lower\n");
783       vg_assert(base + len - 1 >= map->avma);
784       delta = (SSizeT)(map->avma - base);
785       vg_assert(delta > 0);
786       vg_assert(delta < (SSizeT)len);
787       base += delta;
788       len -= delta;
789    }
790    else
791    if (base + len - 1 > map->avma + map->size - 1) {
792       /* Upper end is outside the mapped area.  Hence lower end must be
793          inside it. */
794       if (0) VG_(printf)("XXX truncate upper\n");
795       vg_assert(base <= map->avma + map->size - 1);
796       delta = (SSizeT)( (base + len - 1)
797                         - (map->avma + map->size - 1) );
798       vg_assert(delta > 0);
799       vg_assert(delta < (SSizeT)len);
800       len -= delta;
801    }
802 
803    /* Final checks */
804 
805    /* Because: either cfsi was entirely inside the range, in which
806       case we asserted that len > 0 at the start, OR it fell partially
807       inside the range, in which case we reduced it by some size
808       (delta) which is < its original size. */
809    vg_assert(len > 0);
810 
811    /* Similar logic applies for the next two assertions. */
812    vg_assert(base >= map->avma);
813    vg_assert(base + len - 1
814              <= map->avma + map->size - 1);
815 
816    if (di->cfsi_used == di->cfsi_size) {
817       new_sz = 2 * di->cfsi_size;
818       if (new_sz == 0) new_sz = 20;
819       new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
820                                    new_sz * sizeof(DiCfSI) );
821       if (di->cfsi_rd != NULL) {
822          VG_(memcpy)(new_tab, di->cfsi_rd,
823                      di->cfsi_used * sizeof(DiCfSI));
824          ML_(dinfo_free)(di->cfsi_rd);
825       }
826       di->cfsi_rd = new_tab;
827       di->cfsi_size = new_sz;
828       if (di->cfsi_m_pool == NULL)
829          di->cfsi_m_pool = VG_(newDedupPA)(1000 * sizeof(DiCfSI_m),
830                                            vg_alignof(DiCfSI_m),
831                                            ML_(dinfo_zalloc),
832                                            "di.storage.DiCfSI_m_pool",
833                                            ML_(dinfo_free));
834    }
835 
836    di->cfsi_rd[di->cfsi_used].base = base;
837    di->cfsi_rd[di->cfsi_used].len  = len;
838    di->cfsi_rd[di->cfsi_used].cfsi_m_ix
839       = VG_(allocFixedEltDedupPA)(di->cfsi_m_pool,
840                                   sizeof(DiCfSI_m),
841                                   cfsi_m);
842    di->cfsi_used++;
843    vg_assert(di->cfsi_used <= di->cfsi_size);
844 }
845 
846 
ML_(CfiExpr_Undef)847 Int ML_(CfiExpr_Undef)( XArray* dst )
848 {
849    CfiExpr e;
850    VG_(memset)( &e, 0, sizeof(e) );
851    e.tag = Cex_Undef;
852    return (Int)VG_(addToXA)( dst, &e );
853 }
ML_(CfiExpr_Deref)854 Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr )
855 {
856    CfiExpr e;
857    VG_(memset)( &e, 0, sizeof(e) );
858    e.tag = Cex_Deref;
859    e.Cex.Deref.ixAddr = ixAddr;
860    return (Int)VG_(addToXA)( dst, &e );
861 }
ML_(CfiExpr_Const)862 Int ML_(CfiExpr_Const)( XArray* dst, UWord con )
863 {
864    CfiExpr e;
865    VG_(memset)( &e, 0, sizeof(e) );
866    e.tag = Cex_Const;
867    e.Cex.Const.con = con;
868    return (Int)VG_(addToXA)( dst, &e );
869 }
ML_(CfiExpr_Unop)870 Int ML_(CfiExpr_Unop)( XArray* dst, CfiUnop op, Int ix )
871 {
872    CfiExpr e;
873    VG_(memset)( &e, 0, sizeof(e) );
874    e.tag = Cex_Unop;
875    e.Cex.Unop.op  = op;
876    e.Cex.Unop.ix = ix;
877    return (Int)VG_(addToXA)( dst, &e );
878 }
ML_(CfiExpr_Binop)879 Int ML_(CfiExpr_Binop)( XArray* dst, CfiBinop op, Int ixL, Int ixR )
880 {
881    CfiExpr e;
882    VG_(memset)( &e, 0, sizeof(e) );
883    e.tag = Cex_Binop;
884    e.Cex.Binop.op  = op;
885    e.Cex.Binop.ixL = ixL;
886    e.Cex.Binop.ixR = ixR;
887    return (Int)VG_(addToXA)( dst, &e );
888 }
ML_(CfiExpr_CfiReg)889 Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg )
890 {
891    CfiExpr e;
892    VG_(memset)( &e, 0, sizeof(e) );
893    e.tag = Cex_CfiReg;
894    e.Cex.CfiReg.reg = reg;
895    return (Int)VG_(addToXA)( dst, &e );
896 }
ML_(CfiExpr_DwReg)897 Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg )
898 {
899    CfiExpr e;
900    VG_(memset)( &e, 0, sizeof(e) );
901    e.tag = Cex_DwReg;
902    e.Cex.DwReg.reg = reg;
903    return (Int)VG_(addToXA)( dst, &e );
904 }
905 
ppCfiUnop(CfiUnop op)906 static void ppCfiUnop ( CfiUnop op )
907 {
908    switch (op) {
909       case Cunop_Abs: VG_(printf)("abs"); break;
910       case Cunop_Neg: VG_(printf)("-"); break;
911       case Cunop_Not: VG_(printf)("~"); break;
912       default:        vg_assert(0);
913    }
914 }
915 
ppCfiBinop(CfiBinop op)916 static void ppCfiBinop ( CfiBinop op )
917 {
918    switch (op) {
919       case Cbinop_Add: VG_(printf)("+"); break;
920       case Cbinop_Sub: VG_(printf)("-"); break;
921       case Cbinop_And: VG_(printf)("&"); break;
922       case Cbinop_Mul: VG_(printf)("*"); break;
923       case Cbinop_Shl: VG_(printf)("<<"); break;
924       case Cbinop_Shr: VG_(printf)(">>"); break;
925       case Cbinop_Eq:  VG_(printf)("=="); break;
926       case Cbinop_Ge:  VG_(printf)(">="); break;
927       case Cbinop_Gt:  VG_(printf)(">"); break;
928       case Cbinop_Le:  VG_(printf)("<="); break;
929       case Cbinop_Lt:  VG_(printf)("<"); break;
930       case Cbinop_Ne:  VG_(printf)("!="); break;
931       default:         vg_assert(0);
932    }
933 }
934 
ppCfiReg(CfiReg reg)935 static void ppCfiReg ( CfiReg reg )
936 {
937    switch (reg) {
938       case Creg_INVALID:   VG_(printf)("Creg_INVALID"); break;
939       case Creg_IA_SP:     VG_(printf)("xSP"); break;
940       case Creg_IA_BP:     VG_(printf)("xBP"); break;
941       case Creg_IA_IP:     VG_(printf)("xIP"); break;
942       case Creg_ARM_R13:   VG_(printf)("R13"); break;
943       case Creg_ARM_R12:   VG_(printf)("R12"); break;
944       case Creg_ARM_R15:   VG_(printf)("R15"); break;
945       case Creg_ARM_R14:   VG_(printf)("R14"); break;
946       case Creg_ARM_R7:    VG_(printf)("R7");  break;
947       case Creg_ARM64_X30: VG_(printf)("X30"); break;
948       case Creg_MIPS_RA:   VG_(printf)("RA"); break;
949       case Creg_S390_IA:   VG_(printf)("IA"); break;
950       case Creg_S390_SP:   VG_(printf)("SP"); break;
951       case Creg_S390_FP:   VG_(printf)("FP"); break;
952       case Creg_S390_LR:   VG_(printf)("LR"); break;
953       default: vg_assert(0);
954    }
955 }
956 
ML_(ppCfiExpr)957 void ML_(ppCfiExpr)( const XArray* src, Int ix )
958 {
959    /* VG_(indexXA) checks for invalid src/ix values, so we can
960       use it indiscriminately. */
961    const CfiExpr* e = VG_(indexXA)( src, ix );
962    switch (e->tag) {
963       case Cex_Undef:
964          VG_(printf)("Undef");
965          break;
966       case Cex_Deref:
967          VG_(printf)("*(");
968          ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr);
969          VG_(printf)(")");
970          break;
971       case Cex_Const:
972          VG_(printf)("0x%lx", e->Cex.Const.con);
973          break;
974       case Cex_Unop:
975          ppCfiUnop(e->Cex.Unop.op);
976          VG_(printf)("(");
977          ML_(ppCfiExpr)(src, e->Cex.Unop.ix);
978          VG_(printf)(")");
979          break;
980       case Cex_Binop:
981          VG_(printf)("(");
982          ML_(ppCfiExpr)(src, e->Cex.Binop.ixL);
983          VG_(printf)(")");
984          ppCfiBinop(e->Cex.Binop.op);
985          VG_(printf)("(");
986          ML_(ppCfiExpr)(src, e->Cex.Binop.ixR);
987          VG_(printf)(")");
988          break;
989       case Cex_CfiReg:
990          ppCfiReg(e->Cex.CfiReg.reg);
991          break;
992       case Cex_DwReg:
993          VG_(printf)("dwr%d", e->Cex.DwReg.reg);
994          break;
995       default:
996          VG_(core_panic)("ML_(ppCfiExpr)");
997          /*NOTREACHED*/
998          break;
999    }
1000 }
1001 
1002 
ML_(cmp_for_DiAddrRange_range)1003 Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV,
1004                                       const void* elemV ) {
1005    const Addr* key = (const Addr*)keyV;
1006    const DiAddrRange* elem = (const DiAddrRange*)elemV;
1007    if (0)
1008       VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n",
1009                   *key, elem->aMin);
1010    if ((*key) < elem->aMin) return -1;
1011    if ((*key) > elem->aMax) return 1;
1012    return 0;
1013 }
1014 
1015 static
show_scope(OSet * scope,const HChar * who)1016 void show_scope ( OSet* /* of DiAddrRange */ scope, const HChar* who )
1017 {
1018    DiAddrRange* range;
1019    VG_(printf)("Scope \"%s\" = {\n", who);
1020    VG_(OSetGen_ResetIter)( scope );
1021    while (True) {
1022       range = VG_(OSetGen_Next)( scope );
1023       if (!range) break;
1024       VG_(printf)("   %#lx .. %#lx: %ld vars\n", range->aMin, range->aMax,
1025                   range->vars ? VG_(sizeXA)(range->vars) : 0);
1026    }
1027    VG_(printf)("}\n");
1028 }
1029 
1030 /* Add the variable 'var' to 'scope' for the address range [aMin,aMax]
1031    (inclusive of aMin and aMax).  Split existing ranges as required if
1032    aMin or aMax or both don't match existing range boundaries, and add
1033    'var' to all required ranges.  Take great care to preserve the
1034    invariant that the ranges in 'scope' cover the entire address range
1035    exactly once, with no overlaps and no holes. */
add_var_to_arange(OSet * scope,Addr aMin,Addr aMax,DiVariable * var)1036 static void add_var_to_arange (
1037                /*MOD*/OSet* /* of DiAddrRange */ scope,
1038                Addr aMin,
1039                Addr aMax,
1040                DiVariable* var
1041             )
1042 {
1043    DiAddrRange *first, *last, *range;
1044    /* These xx variables are for assertion checking only; they don't
1045       contribute anything to the actual work of this function. */
1046    DiAddrRange *xxRangep, *xxFirst, *xxLast;
1047    UWord       xxIters;
1048 
1049    vg_assert(aMin <= aMax);
1050 
1051    if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax);
1052    if (0) show_scope( scope, "add_var_to_arange(1)" );
1053 
1054    /* See if the lower end of the range (aMin) falls exactly on an
1055       existing range boundary.  If not, find the range it does fall
1056       into, and split it (copying the variables in the process), so
1057       that aMin does exactly fall on a range boundary. */
1058    first = VG_(OSetGen_Lookup)( scope, &aMin );
1059    /* It must be present, since the presented OSet must cover
1060       the entire address range. */
1061    vg_assert(first);
1062    vg_assert(first->aMin <= first->aMax);
1063    vg_assert(first->aMin <= aMin && aMin <= first->aMax);
1064 
1065    /* Fast track common case, which is that the range specified for
1066       the variable exactly coincides with one already-existing
1067       range. */
1068    if (first->aMin == aMin && first->aMax == aMax) {
1069       vg_assert(first->vars);
1070       VG_(addToXA)( first->vars, var );
1071       return;
1072    }
1073 
1074    /* We have to get into splitting ranges, which is complex
1075       and slow. */
1076    if (first->aMin < aMin) {
1077       DiAddrRange* nyu;
1078       /* Ok.  We'll have to split 'first'. */
1079       /* truncate the upper end of 'first' */
1080       Addr tmp = first->aMax;
1081       first->aMax = aMin-1;
1082       vg_assert(first->aMin <= first->aMax);
1083       /* create a new range */
1084       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
1085       nyu->aMin = aMin;
1086       nyu->aMax = tmp;
1087       vg_assert(nyu->aMin <= nyu->aMax);
1088       /* copy vars into it */
1089       vg_assert(first->vars);
1090       nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars );
1091       VG_(OSetGen_Insert)( scope, nyu );
1092       first = nyu;
1093    }
1094 
1095    vg_assert(first->aMin == aMin);
1096 
1097    /* Now do exactly the same for the upper end (aMax): if it doesn't
1098       fall on a boundary, cause it to do so by splitting the range it
1099       does currently fall into. */
1100    last = VG_(OSetGen_Lookup)( scope, &aMax );
1101    vg_assert(last->aMin <= last->aMax);
1102    vg_assert(last->aMin <= aMax && aMax <= last->aMax);
1103 
1104    if (aMax < last->aMax) {
1105       DiAddrRange* nyu;
1106       /* We have to split 'last'. */
1107       /* truncate the lower end of 'last' */
1108       Addr tmp = last->aMin;
1109       last->aMin = aMax+1;
1110       vg_assert(last->aMin <= last->aMax);
1111       /* create a new range */
1112       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
1113       nyu->aMin = tmp;
1114       nyu->aMax = aMax;
1115       vg_assert(nyu->aMin <= nyu->aMax);
1116       /* copy vars into it */
1117       vg_assert(last->vars);
1118       nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars );
1119       VG_(OSetGen_Insert)( scope, nyu );
1120       last = nyu;
1121    }
1122 
1123    vg_assert(aMax == last->aMax);
1124 
1125    xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin);
1126    xxLast  = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax);
1127    vg_assert(xxFirst);
1128    vg_assert(xxLast);
1129    vg_assert(xxFirst->aMin == aMin);
1130    vg_assert(xxLast->aMax == aMax);
1131    if (xxFirst != xxLast)
1132       vg_assert(xxFirst->aMax < xxLast->aMin);
1133 
1134    /* Great.  Now we merely need to iterate over the segments from
1135       'first' to 'last' inclusive, and add 'var' to the variable set
1136       of each of them. */
1137    if (0) {
1138       static UWord ctr = 0;
1139       ctr++;
1140       VG_(printf)("ctr = %lu\n", ctr);
1141       if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" );
1142    }
1143 
1144    xxIters = 0;
1145    range = xxRangep = NULL;
1146    VG_(OSetGen_ResetIterAt)( scope, &aMin );
1147    while (True) {
1148       xxRangep = range;
1149       range    = VG_(OSetGen_Next)( scope );
1150       if (!range) break;
1151       if (range->aMin > aMax) break;
1152       xxIters++;
1153       if (0) VG_(printf)("have range %#lx %#lx\n",
1154                          range->aMin, range->aMax);
1155 
1156       /* Sanity checks */
1157       if (!xxRangep) {
1158          /* This is the first in the range */
1159          vg_assert(range->aMin == aMin);
1160       } else {
1161          vg_assert(xxRangep->aMax + 1 == range->aMin);
1162       }
1163 
1164       vg_assert(range->vars);
1165       VG_(addToXA)( range->vars, var );
1166    }
1167    /* Done.  We should have seen at least one range. */
1168    vg_assert(xxIters >= 1);
1169    if (xxIters == 1) vg_assert(xxFirst == xxLast);
1170    if (xxFirst == xxLast) vg_assert(xxIters == 1);
1171    vg_assert(xxRangep);
1172    vg_assert(xxRangep->aMax == aMax);
1173    vg_assert(xxRangep == xxLast);
1174 }
1175 
1176 
1177 /* Top-level place to call to add a variable description (as extracted
1178    from a DWARF3 .debug_info section. */
ML_(addVar)1179 void ML_(addVar)( struct _DebugInfo* di,
1180                   Int    level,
1181                   Addr   aMin,
1182                   Addr   aMax,
1183                   const  HChar* name, /* in di's .strpool */
1184                   UWord  typeR, /* a cuOff */
1185                   const GExpr* gexpr,
1186                   const GExpr* fbGX,
1187                   UInt   fndn_ix, /* where decl'd - may be zero.
1188                                      index in in di's .fndnpool */
1189                   Int    lineNo, /* where decl'd - may be zero */
1190                   Bool   show )
1191 {
1192    OSet* /* of DiAddrRange */ scope;
1193    DiVariable var;
1194    Bool       all;
1195    TyEnt*     ent;
1196    MaybeULong mul;
1197    const HChar* badness;
1198 
1199    vg_assert(di && di->admin_tyents);
1200 
1201    if (0) {
1202       VG_(printf)("  ML_(addVar): level %d  %#lx-%#lx  %s :: ",
1203                   level, aMin, aMax, name );
1204       ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR );
1205       VG_(printf)("\n  Var=");
1206       ML_(pp_GX)(gexpr);
1207       VG_(printf)("\n");
1208       if (fbGX) {
1209          VG_(printf)("  FrB=");
1210          ML_(pp_GX)( fbGX );
1211          VG_(printf)("\n");
1212       } else {
1213          VG_(printf)("  FrB=none\n");
1214       }
1215       VG_(printf)("\n");
1216    }
1217 
1218    vg_assert(level >= 0);
1219    vg_assert(aMin <= aMax);
1220    vg_assert(name);
1221    vg_assert(gexpr);
1222 
1223    ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR);
1224    vg_assert(ent);
1225    vg_assert(ML_(TyEnt__is_type)(ent));
1226 
1227    /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere)
1228       ----------------------------------------------------------------
1229       Ignore any variables whose aMin .. aMax (that is, range of text
1230       addresses for which they actually exist) falls outside the text
1231       segment.  Is this indicative of a bug in the reader?  Maybe.
1232       (LATER): instead of restricting strictly to the .text segment,
1233       be a bit more relaxed, and accept any variable whose text range
1234       falls inside the r-x mapped area.  This is useful because .text
1235       is not always the only instruction-carrying segment: others are:
1236       .init .plt __libc_freeres_fn and .fini.  This implicitly assumes
1237       that those extra sections have the same bias as .text, but that
1238       seems a reasonable assumption to me. */
1239    /* This is assured us by top level steering logic in debuginfo.c,
1240       and it is re-checked at the start of
1241       ML_(read_elf_debug_info). */
1242    vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
1243    if (level > 0 && ML_(find_rx_mapping)(di, aMin, aMax) == NULL) {
1244       if (VG_(clo_verbosity) >= 0) {
1245          VG_(message)(Vg_DebugMsg,
1246             "warning: addVar: in range %#lx .. %#lx outside "
1247             "all rx mapped areas (%s)\n",
1248             aMin, aMax, name
1249          );
1250       }
1251       return;
1252    }
1253 
1254    /* If the type's size is zero (which can mean unknown size), ignore
1255       it.  We will never be able to actually relate a data address to
1256       a data object with zero size, so there's no point in storing
1257       info on it.  On 32-bit platforms, also reject types whose size
1258       is 2^32 bytes or large.  (It's amazing what junk shows up ..) */
1259    mul = ML_(sizeOfType)(di->admin_tyents, typeR);
1260 
1261    badness = NULL;
1262    if (mul.b != True)
1263       badness = "unknown size";
1264    else if (mul.ul == 0)
1265       badness = "zero size   ";
1266    else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32))
1267       badness = "implausibly large";
1268 
1269    if (badness) {
1270       static Int complaints = 10;
1271       if (VG_(clo_verbosity) >= 2 && complaints > 0) {
1272          VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)\n",
1273                                    badness, name );
1274          complaints--;
1275       }
1276       return;
1277    }
1278 
1279    if (!di->varinfo) {
1280       di->varinfo = VG_(newXA)( ML_(dinfo_zalloc),
1281                                 "di.storage.addVar.1",
1282                                 ML_(dinfo_free),
1283                                 sizeof(OSet*) );
1284    }
1285 
1286    vg_assert(level < 256); /* arbitrary; stay sane */
1287    /* Expand the top level array enough to map this level */
1288    while ( VG_(sizeXA)(di->varinfo) <= level ) {
1289       DiAddrRange* nyu;
1290       scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin),
1291                                    ML_(cmp_for_DiAddrRange_range),
1292                                    ML_(dinfo_zalloc), "di.storage.addVar.2",
1293                                    ML_(dinfo_free) );
1294       if (0) VG_(printf)("create: scope = %p, adding at %ld\n",
1295                          scope, VG_(sizeXA)(di->varinfo));
1296       VG_(addToXA)( di->varinfo, &scope );
1297       /* Add a single range covering the entire address space.  At
1298          level 0 we require this doesn't get split.  At levels above 0
1299          we require that any additions to it cause it to get split.
1300          All of these invariants get checked both add_var_to_arange
1301          and after reading is complete, in canonicaliseVarInfo. */
1302       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
1303       nyu->aMin = (Addr)0;
1304       nyu->aMax = ~(Addr)0;
1305       nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3",
1306                               ML_(dinfo_free),
1307                               sizeof(DiVariable) );
1308       VG_(OSetGen_Insert)( scope, nyu );
1309    }
1310 
1311    vg_assert( VG_(sizeXA)(di->varinfo) > level );
1312    scope = *(OSet**)VG_(indexXA)( di->varinfo, level );
1313    vg_assert(scope);
1314 
1315    var.name     = name;
1316    var.typeR    = typeR;
1317    var.gexpr    = gexpr;
1318    var.fbGX     = fbGX;
1319    var.fndn_ix  = fndn_ix;
1320    var.lineNo   = lineNo;
1321 
1322    all = aMin == (Addr)0 && aMax == ~(Addr)0;
1323    vg_assert(level == 0 ? all : !all);
1324 
1325    add_var_to_arange( /*MOD*/scope, aMin, aMax, &var );
1326 }
1327 
1328 
1329 /* This really just checks the constructed data structure, as there is
1330    no canonicalisation to do. */
canonicaliseVarInfo(struct _DebugInfo * di)1331 static void canonicaliseVarInfo ( struct _DebugInfo* di )
1332 {
1333    Word i, nInThisScope;
1334 
1335    if (!di->varinfo)
1336       return;
1337 
1338    for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) {
1339 
1340       DiAddrRange *range, *rangep;
1341       OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i);
1342       if (!scope) continue;
1343 
1344       /* Deal with the global-scope case. */
1345       if (i == 0) {
1346          Addr zero = 0;
1347          vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1348          range = VG_(OSetGen_Lookup)( scope, &zero );
1349          vg_assert(range);
1350          vg_assert(range->aMin == (Addr)0);
1351          vg_assert(range->aMax == ~(Addr)0);
1352          continue;
1353       }
1354 
1355       /* All the rest of this is for the local-scope case. */
1356       /* iterate over all entries in 'scope' */
1357       nInThisScope = 0;
1358       rangep = NULL;
1359       VG_(OSetGen_ResetIter)(scope);
1360       while (True) {
1361          range = VG_(OSetGen_Next)(scope);
1362          if (!range) {
1363            /* We just saw the last one.  There must have been at
1364               least one entry in the range. */
1365            vg_assert(rangep);
1366            vg_assert(rangep->aMax == ~(Addr)0);
1367            break;
1368          }
1369 
1370          vg_assert(range->aMin <= range->aMax);
1371          vg_assert(range->vars);
1372 
1373          if (!rangep) {
1374            /* This is the first entry in the range. */
1375            vg_assert(range->aMin == 0);
1376          } else {
1377            vg_assert(rangep->aMax + 1 == range->aMin);
1378          }
1379 
1380          rangep = range;
1381          nInThisScope++;
1382       } /* iterating over ranges in a given scope */
1383 
1384       /* If there's only one entry in this (local) scope, it must
1385          cover the entire address space (obviously), but it must not
1386          contain any vars. */
1387 
1388       vg_assert(nInThisScope > 0);
1389       if (nInThisScope == 1) {
1390          Addr zero = 0;
1391          vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1392          range = VG_(OSetGen_Lookup)( scope, &zero );
1393          vg_assert(range);
1394          vg_assert(range->aMin == (Addr)0);
1395          vg_assert(range->aMax == ~(Addr)0);
1396          vg_assert(range->vars);
1397          vg_assert(VG_(sizeXA)(range->vars) == 0);
1398       }
1399 
1400    } /* iterate over scopes */
1401 }
1402 
1403 
1404 /*------------------------------------------------------------*/
1405 /*--- Canonicalisers                                       ---*/
1406 /*------------------------------------------------------------*/
1407 
1408 /* Sort the symtab by starting address, and emit warnings if any
1409    symbols have overlapping address ranges.  We use that old chestnut,
1410    shellsort.  Mash the table around so as to establish the property
1411    that addresses are in order and the ranges to not overlap.  This
1412    facilitates using binary search to map addresses to symbols when we
1413    come to query the table.
1414 */
compare_DiSym(const void * va,const void * vb)1415 static Int compare_DiSym ( const void* va, const void* vb )
1416 {
1417    const DiSym* a = va;
1418    const DiSym* b = vb;
1419    if (a->avmas.main < b->avmas.main) return -1;
1420    if (a->avmas.main > b->avmas.main) return  1;
1421    return 0;
1422 }
1423 
1424 
1425 /* An address is associated with more than one name.  Which do we
1426    prefer as the "display" name (that we show the user in stack
1427    traces)?  In order:
1428 
1429    - Prefer "PMPI_<foo>" over "MPI_<foo>".
1430 
1431    - Else, prefer a non-empty name over an empty one.
1432 
1433    - Else, prefer a non-whitespace name over an all-whitespace name.
1434 
1435    - Else, prefer the shorter symbol name.  If the symbol contains a
1436      version symbol ('@' on Linux, other platforms may differ), which means it
1437      is versioned, then the length up to the version symbol is used for length
1438      comparison purposes (so "foo@GLIBC_2.4.2" is considered shorter than
1439      "foobar").
1440 
1441    - Else, if two symbols have the same length, prefer a versioned symbol over
1442      a non-versioned symbol.
1443 
1444    - Else, use alphabetical ordering.
1445 
1446    - Otherwise, they must be the same;  use the name with the lower address.
1447 
1448    Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are
1449    aliases in glibc, we choose the 'bcmp' symbol because it's shorter,
1450    so we can misdescribe memcmp() as bcmp()).  This is hard to avoid.
1451    It's mentioned in the FAQ file.
1452 
1453    Returned value is True if a_name is preferred, False if b_name is
1454    preferred.
1455  */
1456 static
preferName(const DebugInfo * di,const HChar * a_name,const HChar * b_name,Addr sym_avma)1457 Bool preferName ( const DebugInfo* di,
1458                   const HChar* a_name, const HChar* b_name,
1459                   Addr sym_avma/*exposition only*/ )
1460 {
1461    Word cmp;
1462    Word vlena, vlenb;		/* length without version */
1463    const HChar *vpa, *vpb;
1464 
1465    Bool preferA = False;
1466    Bool preferB = False;
1467 
1468    vg_assert(a_name);
1469    vg_assert(b_name);
1470    // vg_assert(a_name != b_name);
1471    // ???? now the pointers can be equal but is that
1472    // ???? not the indication of a latent bug ????
1473 
1474    vlena = VG_(strlen)(a_name);
1475    vlenb = VG_(strlen)(b_name);
1476 
1477 #  if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_dragonfly)
1478 #    define VERSION_CHAR '@'
1479 #  elif defined(VGO_darwin)
1480 #    define VERSION_CHAR '$'
1481 #  else
1482 #    error Unknown OS
1483 #  endif
1484 
1485    vpa = VG_(strchr)(a_name, VERSION_CHAR);
1486    vpb = VG_(strchr)(b_name, VERSION_CHAR);
1487 
1488 #  undef VERSION_CHAR
1489 
1490    if (vpa)
1491       vlena = vpa - a_name;
1492    if (vpb)
1493       vlenb = vpb - b_name;
1494 
1495    /* MPI hack: prefer PMPI_Foo over MPI_Foo */
1496    if (0==VG_(strncmp)(a_name, "MPI_", 4)
1497        && 0==VG_(strncmp)(b_name, "PMPI_", 5)
1498        && 0==VG_(strcmp)(a_name, 1+b_name)) {
1499       preferB = True; goto out;
1500    }
1501    if (0==VG_(strncmp)(b_name, "MPI_", 4)
1502        && 0==VG_(strncmp)(a_name, "PMPI_", 5)
1503        && 0==VG_(strcmp)(b_name, 1+a_name)) {
1504       preferA = True; goto out;
1505    }
1506 
1507    /* Prefer non-empty name. */
1508    if (vlena  &&  !vlenb) {
1509       preferA = True; goto out;
1510    }
1511    if (vlenb  &&  !vlena) {
1512       preferB = True; goto out;
1513    }
1514 
1515    /* Prefer non-whitespace name. */
1516    {
1517       Bool blankA = True;
1518       Bool blankB = True;
1519       const HChar *s;
1520       s = a_name;
1521       while (*s) {
1522          if (!VG_(isspace)(*s++)) {
1523             blankA = False;
1524             break;
1525          }
1526       }
1527       s = b_name;
1528       while (*s) {
1529          if (!VG_(isspace)(*s++)) {
1530             blankB = False;
1531             break;
1532          }
1533       }
1534 
1535       if (!blankA  &&  blankB) {
1536          preferA = True; goto out;
1537       }
1538       if (!blankB  &&  blankA) {
1539          preferB = True; goto out;
1540       }
1541    }
1542 
1543    /* Select the shortest unversioned name */
1544    if (vlena < vlenb) {
1545       preferA = True; goto out;
1546    }
1547    if (vlenb < vlena) {
1548       preferB = True; goto out;
1549    }
1550 
1551    /* Equal lengths; select the versioned name */
1552    if (vpa && !vpb) {
1553       preferA = True; goto out;
1554    }
1555    if (vpb && !vpa) {
1556       preferB = True; goto out;
1557    }
1558 
1559    /* Either both versioned or neither is versioned; select them
1560       alphabetically */
1561    cmp = VG_(strcmp)(a_name, b_name);
1562    if (cmp < 0) {
1563       preferA = True; goto out;
1564    }
1565    if (cmp > 0) {
1566       preferB = True; goto out;
1567    }
1568 
1569    /* If we get here, they are the same name. */
1570 
1571    /* In this case we could choose either (arbitrarily), but might as
1572       well choose the one with the lowest DiSym* address, so as to try
1573       and make the comparison mechanism more stable (a la sorting
1574       parlance).  Also, skip the diagnostic printing in this case. */
1575    return a_name <= b_name  ? True  : False;
1576 
1577    /*NOTREACHED*/
1578    vg_assert(0);
1579   out:
1580    if (preferA && !preferB) {
1581       TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1582                    sym_avma, a_name, b_name );
1583       return True;
1584    }
1585    if (preferB && !preferA) {
1586       TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1587                    sym_avma, b_name, a_name );
1588       return False;
1589    }
1590    /*NOTREACHED*/
1591    vg_assert(0);
1592 }
1593 
1594 
1595 /* Add the names in FROM to the names in TO. */
1596 static
add_DiSym_names_to_from(const DebugInfo * di,DiSym * to,const DiSym * from)1597 void add_DiSym_names_to_from ( const DebugInfo* di, DiSym* to,
1598                                const DiSym* from )
1599 {
1600    vg_assert(to->pri_name);
1601    vg_assert(from->pri_name);
1602    /* Figure out how many names there will be in the new combined
1603       secondary vector. */
1604    const HChar** to_sec   = to->sec_names;
1605    const HChar** from_sec = from->sec_names;
1606    Word n_new_sec = 1;
1607    if (from_sec) {
1608       while (*from_sec) {
1609          n_new_sec++;
1610          from_sec++;
1611       }
1612    }
1613    if (to_sec) {
1614       while (*to_sec) {
1615          n_new_sec++;
1616          to_sec++;
1617       }
1618    }
1619    if (0)
1620       TRACE_SYMTAB("merge: -> %ld\n", n_new_sec);
1621    /* Create the new sec and copy stuff into it, putting the new
1622       entries at the end. */
1623    const HChar** new_sec = ML_(dinfo_zalloc)( "di.storage.aDntf.1",
1624                                               (n_new_sec+1) * sizeof(HChar*) );
1625    from_sec = from->sec_names;
1626    to_sec   = to->sec_names;
1627    Word i = 0;
1628    if (to_sec) {
1629       while (*to_sec) {
1630          new_sec[i++] = *to_sec;
1631          to_sec++;
1632       }
1633    }
1634    new_sec[i++] = from->pri_name;
1635    if (from_sec) {
1636       while (*from_sec) {
1637          new_sec[i++] = *from_sec;
1638          from_sec++;
1639       }
1640    }
1641    vg_assert(i == n_new_sec);
1642    vg_assert(new_sec[i] == NULL);
1643    /* If we're replacing an existing secondary vector, free it. */
1644    if (to->sec_names) {
1645       ML_(dinfo_free)(to->sec_names);
1646    }
1647    to->sec_names = new_sec;
1648 }
1649 
1650 
canonicaliseSymtab(struct _DebugInfo * di)1651 static void canonicaliseSymtab ( struct _DebugInfo* di )
1652 {
1653    Word  i, j, n_truncated;
1654    Addr  sta1, sta2, end1, end2, toc1, toc2;
1655    const HChar *pri1, *pri2, **sec1, **sec2;
1656    Bool  ist1, ist2, isf1, isf2, isg1, isg2;
1657 
1658 #  define SWAP(ty,aa,bb) \
1659       do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
1660 
1661    if (di->symtab_used == 0)
1662       return;
1663 
1664    /* Check initial invariants */
1665    for (i = 0; i < di->symtab_used; i++) {
1666       DiSym* sym = &di->symtab[i];
1667       vg_assert(sym->pri_name);
1668       vg_assert(!sym->sec_names);
1669    }
1670 
1671    /* Sort by address. */
1672    VG_(ssort)(di->symtab, di->symtab_used,
1673                           sizeof(*di->symtab), compare_DiSym);
1674 
1675   cleanup_more:
1676 
1677    /* BEGIN Detect and "fix" identical address ranges. */
1678    while (1) {
1679       Word r, w, n_merged;
1680       n_merged = 0;
1681       w = 0;
1682       /* A pass merging entries together in the case where they agree
1683          on .isText -- that is, either: both are .isText or both are
1684          not .isText.  They are merged into a single entry, but both
1685          sets of names are preserved, so we end up knowing all the
1686          names for that particular address range.*/
1687       for (r = 1; r < di->symtab_used; r++) {
1688          vg_assert(w < r);
1689          if (   di->symtab[w].avmas.main == di->symtab[r].avmas.main
1690              && di->symtab[w].size       == di->symtab[r].size
1691              && !!di->symtab[w].isText   == !!di->symtab[r].isText) {
1692             /* merge the two into one */
1693             n_merged++;
1694             /* Add r names to w if r has secondary names
1695                or r and w primary names differ. */
1696             if (di->symtab[r].sec_names
1697                 || (0 != VG_(strcmp)(di->symtab[r].pri_name,
1698                                      di->symtab[w].pri_name))) {
1699                add_DiSym_names_to_from(di, &di->symtab[w], &di->symtab[r]);
1700             }
1701             /* mark w as an IFunc if either w or r are */
1702             di->symtab[w].isIFunc = di->symtab[w].isIFunc || di->symtab[r].isIFunc;
1703             /* likewise for global symbols */
1704             di->symtab[w].isGlobal = di->symtab[w].isGlobal || di->symtab[r].isGlobal;
1705             /* and use ::pri_names to indicate this slot is no longer in use */
1706             di->symtab[r].pri_name = NULL;
1707             if (di->symtab[r].sec_names) {
1708                ML_(dinfo_free)(di->symtab[r].sec_names);
1709                di->symtab[r].sec_names = NULL;
1710             }
1711             /* Completely zap the entry -- paranoia to make it more
1712                likely we'll notice if we inadvertently use it
1713                again. */
1714             VG_(memset)(&di->symtab[r], 0, sizeof(DiSym));
1715          } else {
1716             w = r;
1717          }
1718       }
1719 
1720       /* A second pass merging entries together where one .isText but
1721          the other isn't.  In such cases, just ignore the non-.isText
1722          one (a heuristic hack.) */
1723       for (r = 1; r < di->symtab_used; r++) {
1724          /* Either of the symbols might already have been zapped by
1725             the previous pass, so we first have to check that. */
1726          if (di->symtab[r-1].pri_name == NULL) continue;
1727          if (di->symtab[r-0].pri_name == NULL) continue;
1728          /* ok, they are both still valid.  Identical address ranges? */
1729          if (di->symtab[r-1].avmas.main != di->symtab[r-0].avmas.main) continue;
1730          if (di->symtab[r-1].size       != di->symtab[r-0].size) continue;
1731          /* Identical address ranges.  They must disagree on .isText
1732             since if they agreed, the previous pass would have merged
1733             them. */
1734          if (di->symtab[r-1].isText && di->symtab[r-0].isText) vg_assert(0);
1735          if (!di->symtab[r-1].isText && !di->symtab[r-0].isText) vg_assert(0);
1736          Word to_zap  = di->symtab[r-1].isText ? (r-0) : (r-1);
1737          Word to_keep = di->symtab[r-1].isText ? (r-1) : (r-0);
1738          vg_assert(!di->symtab[to_zap].isText);
1739          vg_assert(di->symtab[to_keep].isText);
1740          /* Add to_zap's names to to_keep if to_zap has secondary names
1741             or to_zap's and to_keep's primary names differ. */
1742          if (di->symtab[to_zap].sec_names
1743              || (0 != VG_(strcmp)(di->symtab[to_zap].pri_name,
1744                                   di->symtab[to_keep].pri_name))) {
1745             add_DiSym_names_to_from(di, &di->symtab[to_keep],
1746                                         &di->symtab[to_zap]);
1747          }
1748          /* Mark to_zap as not-in use in the same way as in the
1749             previous loop. */
1750          di->symtab[to_zap].pri_name = NULL;
1751          if (di->symtab[to_zap].sec_names) {
1752             ML_(dinfo_free)(di->symtab[to_zap].sec_names);
1753             di->symtab[to_zap].sec_names = NULL;
1754          }
1755          VG_(memset)(&di->symtab[to_zap], 0, sizeof(DiSym));
1756          n_merged++;
1757       }
1758 
1759       TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged);
1760       if (n_merged == 0)
1761          break;
1762       /* Now a pass to squeeze out any unused ones */
1763       w = 0;
1764       for (r = 0; r < di->symtab_used; r++) {
1765          vg_assert(w <= r);
1766          if (di->symtab[r].pri_name == NULL)
1767             continue;
1768          if (w < r) {
1769             di->symtab[w] = di->symtab[r];
1770          }
1771          w++;
1772       }
1773       vg_assert(w + n_merged == di->symtab_used);
1774       di->symtab_used = w;
1775    } /* while (1) */
1776    /* END Detect and "fix" identical address ranges. */
1777 
1778    /* BEGIN Detect and "fix" overlapping address ranges. */
1779    n_truncated = 0;
1780 
1781    for (i = 0; i < ((Word)di->symtab_used) -1; i++) {
1782 
1783       vg_assert(di->symtab[i].avmas.main <= di->symtab[i+1].avmas.main);
1784 
1785       /* Check for common (no overlap) case. */
1786       if (di->symtab[i].avmas.main + di->symtab[i].size
1787           <= di->symtab[i+1].avmas.main)
1788          continue;
1789 
1790       /* There's an overlap.  Truncate one or the other. */
1791       if (di->trace_symtab) {
1792          VG_(printf)("overlapping address ranges in symbol table\n\t");
1793          ML_(ppSym)( i, &di->symtab[i] );
1794          VG_(printf)("\t");
1795          ML_(ppSym)( i+1, &di->symtab[i+1] );
1796          VG_(printf)("\n");
1797       }
1798 
1799       /* Truncate one or the other. */
1800       sta1 = di->symtab[i].avmas.main;
1801       end1 = sta1 + di->symtab[i].size - 1;
1802       toc1 = GET_TOCPTR_AVMA(di->symtab[i].avmas);
1803       // aren't we missing local_ep here ????
1804       pri1 = di->symtab[i].pri_name;
1805       sec1 = di->symtab[i].sec_names;
1806       ist1 = di->symtab[i].isText;
1807       isf1 = di->symtab[i].isIFunc;
1808       isg1 = di->symtab[i].isGlobal;
1809 
1810       sta2 = di->symtab[i+1].avmas.main;
1811       end2 = sta2 + di->symtab[i+1].size - 1;
1812       toc2 = GET_TOCPTR_AVMA(di->symtab[i+1].avmas);
1813       // aren't we missing local_ep here ????
1814       pri2 = di->symtab[i+1].pri_name;
1815       sec2 = di->symtab[i+1].sec_names;
1816       ist2 = di->symtab[i+1].isText;
1817       isf2 = di->symtab[i+1].isIFunc;
1818       isg2 = di->symtab[i+1].isGlobal;
1819 
1820       if (sta1 < sta2) {
1821          end1 = sta2 - 1;
1822       } else {
1823          vg_assert(sta1 == sta2);
1824          if (end1 > end2) {
1825             sta1 = end2 + 1;
1826             SWAP(Addr,sta1,sta2); SWAP(Addr,end1,end2); SWAP(Addr,toc1,toc2);
1827             SWAP(const HChar*,pri1,pri2); SWAP(const HChar**,sec1,sec2);
1828             SWAP(Bool,ist1,ist2); SWAP(Bool,isf1,isf2); SWAP(Bool, isg1, isg2);
1829          } else
1830          if (end1 < end2) {
1831             sta2 = end1 + 1;
1832          } else {
1833 	   /* end1 == end2.  Identical addr ranges.  We'll eventually wind
1834               up back at cleanup_more, which will take care of it. */
1835 	 }
1836       }
1837       di->symtab[i].avmas.main = sta1;
1838       di->symtab[i].size       = end1 - sta1 + 1;
1839       SET_TOCPTR_AVMA(di->symtab[i].avmas, toc1);
1840       // missing local_ep ???
1841       di->symtab[i].pri_name  = pri1;
1842       di->symtab[i].sec_names = sec1;
1843       di->symtab[i].isText    = ist1;
1844       di->symtab[i].isIFunc   = isf1;
1845       di->symtab[i].isGlobal  = isg1;
1846 
1847       di->symtab[i+1].avmas.main = sta2;
1848       di->symtab[i+1].size       = end2 - sta2 + 1;
1849       SET_TOCPTR_AVMA(di->symtab[i+1].avmas, toc2);
1850       // missing local_ep ???
1851       di->symtab[i+1].pri_name  = pri2;
1852       di->symtab[i+1].sec_names = sec2;
1853       di->symtab[i+1].isText    = ist2;
1854       di->symtab[i+1].isIFunc   = isf2;
1855       di->symtab[i+1].isGlobal  = isg2;
1856 
1857       vg_assert(sta1 <= sta2);
1858       vg_assert(di->symtab[i].size > 0);
1859       vg_assert(di->symtab[i+1].size > 0);
1860       /* It may be that the i+1 entry now needs to be moved further
1861          along to maintain the address order requirement. */
1862       j = i+1;
1863       while (j < ((Word)di->symtab_used)-1
1864              && di->symtab[j].avmas.main > di->symtab[j+1].avmas.main) {
1865          SWAP(DiSym,di->symtab[j],di->symtab[j+1]);
1866          j++;
1867       }
1868       n_truncated++;
1869    }
1870    /* END Detect and "fix" overlapping address ranges. */
1871 
1872    if (n_truncated > 0) goto cleanup_more;
1873 
1874    /* Ensure relevant postconditions hold. */
1875    for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
1876       /* No zero-sized symbols. */
1877       vg_assert(di->symtab[i].size > 0);
1878       /* In order. */
1879       vg_assert(di->symtab[i].avmas.main < di->symtab[i+1].avmas.main);
1880       /* No overlaps. */
1881       vg_assert(di->symtab[i].avmas.main + di->symtab[i].size - 1
1882                 < di->symtab[i+1].avmas.main);
1883       /* Names are sane(ish) */
1884       vg_assert(di->symtab[i].pri_name);
1885       if (di->symtab[i].sec_names) {
1886          vg_assert(di->symtab[i].sec_names[0]);
1887       }
1888    }
1889 
1890    /* For each symbol that has more than one name, use preferName to
1891       select the primary name.  This is a complete kludge in that
1892       doing it properly requires making a total ordering on the
1893       candidate names, whilst what we have to work with is an ad-hoc
1894       binary relation (preferName) that certainly doesn't have the
1895       relevant transitivity etc properties that are needed to induce a
1896       legitimate total order.  Doesn't matter though if it doesn't
1897       always work right since this is only used to generate names to
1898       show the user. */
1899    for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
1900       DiSym*  sym = &di->symtab[i];
1901       const HChar** sec = sym->sec_names;
1902       if (!sec)
1903          continue;
1904       /* Slow but simple.  Copy all the cands into a temp array,
1905          choose the primary name, and copy them all back again. */
1906       Word n_tmp = 1;
1907       while (*sec) { n_tmp++; sec++; }
1908       j = 0;
1909       const HChar** tmp = ML_(dinfo_zalloc)( "di.storage.cS.1",
1910                                              (n_tmp+1) * sizeof(HChar*) );
1911       tmp[j++] = sym->pri_name;
1912       sec = sym->sec_names;
1913       while (*sec) { tmp[j++] = *sec; sec++; }
1914       vg_assert(j == n_tmp);
1915       vg_assert(tmp[n_tmp] == NULL); /* because of zalloc */
1916       /* Choose the most favoured. */
1917       Word best = 0;
1918       for (j = 1; j < n_tmp; j++) {
1919          if (preferName(di, tmp[best], tmp[j], di->symtab[i].avmas.main)) {
1920             /* best is unchanged */
1921          } else {
1922             best = j;
1923          }
1924       }
1925       vg_assert(best >= 0 && best < n_tmp);
1926       /* Copy back */
1927       sym->pri_name = tmp[best];
1928       const HChar** cursor = sym->sec_names;
1929       for (j = 0; j < n_tmp; j++) {
1930          if (j == best)
1931             continue;
1932          *cursor = tmp[j];
1933          cursor++;
1934       }
1935       vg_assert(*cursor == NULL);
1936       ML_(dinfo_free)( tmp );
1937    }
1938 
1939 #  undef SWAP
1940 }
1941 
1942 
1943 static DiLoc* sorting_loctab = NULL;
compare_DiLoc_via_ix(const void * va,const void * vb)1944 static Int compare_DiLoc_via_ix ( const void* va, const void* vb )
1945 {
1946    const DiLoc* a = &sorting_loctab[*(const UInt*)va];
1947    const DiLoc* b = &sorting_loctab[*(const UInt*)vb];
1948    if (a->addr < b->addr) return -1;
1949    if (a->addr > b->addr) return  1;
1950    return 0;
1951 }
sort_loctab_and_loctab_fndn_ix(struct _DebugInfo * di)1952 static void sort_loctab_and_loctab_fndn_ix (struct _DebugInfo* di )
1953 {
1954    /* We have to sort the array loctab by addr
1955       together with its "parallel" array loctab_fndn_ix.
1956       We first build sort_ix : an array of indexes in loctab,
1957       that we sort by loctab address. Then we can reorder both
1958       arrays according to sort_ix. */
1959    UInt *sort_ix = ML_(dinfo_zalloc)("di.storage.six",
1960                                      di->loctab_used*sizeof(UInt));
1961    Word i, j, k;
1962 
1963    for (i = 0; i < di->loctab_used; i++) sort_ix[i] = i;
1964    sorting_loctab = di->loctab;
1965    VG_(ssort)(sort_ix, di->loctab_used,
1966               sizeof(*sort_ix), compare_DiLoc_via_ix);
1967    sorting_loctab = NULL;
1968 
1969    // Permute in place, using the sort_ix.
1970    for (i=0; i < di->loctab_used; i++) {
1971       DiLoc tmp_diloc;
1972       UInt  tmp_fndn_ix;
1973 
1974       if (i == sort_ix[i])
1975          continue; // i already at the good place
1976 
1977       tmp_diloc = di->loctab[i];
1978       tmp_fndn_ix = ML_(fndn_ix)(di, i);
1979       j = i;
1980       for (;;) {
1981          k = sort_ix[j];
1982          sort_ix[j] = j;
1983          if (k == i)
1984             break;
1985          di->loctab[j] = di->loctab[k];
1986          set_fndn_ix (di, j, ML_(fndn_ix)(di, k));
1987          j = k;
1988       }
1989       di->loctab[j] = tmp_diloc;
1990       set_fndn_ix (di, j, tmp_fndn_ix);
1991    }
1992    ML_(dinfo_free)(sort_ix);
1993 }
1994 
1995 /* Sort the location table by starting address.  Mash the table around
1996    so as to establish the property that addresses are in order and the
1997    ranges do not overlap.  This facilitates using binary search to map
1998    addresses to locations when we come to query the table.
1999 */
canonicaliseLoctab(struct _DebugInfo * di)2000 static void canonicaliseLoctab ( struct _DebugInfo* di )
2001 {
2002    Word i, j;
2003 
2004    if (di->loctab_used == 0)
2005       return;
2006 
2007    /* sort loctab and loctab_fndn_ix by addr. */
2008    sort_loctab_and_loctab_fndn_ix (di);
2009 
2010    for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
2011       vg_assert(di->loctab[i].size < 10000);
2012       /* If two adjacent entries overlap, truncate the first. */
2013       if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) {
2014          /* Do this in signed int32 because the actual .size fields
2015             are only 12 bits. */
2016          Int new_size = di->loctab[i+1].addr - di->loctab[i].addr;
2017          TRACE_LOCTAB_CANON ("Truncating",
2018                              &(di->loctab[i]), &(di->loctab[i+1]));
2019          if (new_size < 0) {
2020             di->loctab[i].size = 0;
2021          } else
2022          if (new_size > MAX_LOC_SIZE) {
2023            di->loctab[i].size = MAX_LOC_SIZE;
2024          } else {
2025            di->loctab[i].size = (UShort)new_size;
2026          }
2027       }
2028    }
2029 
2030    /* Zap any zero-sized entries resulting from the truncation
2031       process. */
2032    j = 0;
2033    for (i = 0; i < (Word)di->loctab_used; i++) {
2034       if (di->loctab[i].size > 0) {
2035          if (j != i) {
2036             di->loctab[j] = di->loctab[i];
2037             set_fndn_ix(di, j, ML_(fndn_ix)(di, i));
2038          }
2039          j++;
2040       }
2041    }
2042    di->loctab_used = j;
2043 
2044    /* Ensure relevant postconditions hold. */
2045    for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
2046       if (0)
2047          VG_(printf)("%ld  0x%p  lno:%d sz:%d fndn_ix:%u  i+1 0x%p\n",
2048                      i,
2049                      (void*)di->loctab[i].addr,
2050                      di->loctab[i].lineno,
2051                      di->loctab[i].size,
2052                      ML_(fndn_ix)(di, i),
2053                      (void*)di->loctab[i+1].addr);
2054 
2055       /* No zero-sized symbols. */
2056       vg_assert(di->loctab[i].size > 0);
2057       /* In order. */
2058       vg_assert(di->loctab[i].addr < di->loctab[i+1].addr);
2059       /* No overlaps. */
2060       vg_assert(di->loctab[i].addr + di->loctab[i].size - 1
2061                 < di->loctab[i+1].addr);
2062    }
2063 
2064    /* Free up unused space at the end of the table. */
2065    shrinkLocTab(di);
2066 }
2067 
2068 /* Sort the inlined call table by starting address.  Mash the table around
2069    so as to establish the property that addresses are in order.
2070    This facilitates using binary search to map addresses to locations when
2071    we come to query the table.
2072    Note : ranges can overlap, multiple ranges can start at an address,
2073    multiple ranges can end at an address.
2074 */
compare_DiInlLoc(const void * va,const void * vb)2075 static Int compare_DiInlLoc ( const void* va, const void* vb )
2076 {
2077    const DiInlLoc* a = va;
2078    const DiInlLoc* b = vb;
2079    if (a->addr_lo < b->addr_lo) return -1;
2080    if (a->addr_lo > b->addr_lo) return  1;
2081    return 0;
2082 }
2083 
canonicaliseInltab(struct _DebugInfo * di)2084 static void canonicaliseInltab ( struct _DebugInfo* di )
2085 {
2086    Word i;
2087 
2088    if (di->inltab_used == 0)
2089       return;
2090 
2091    /* Sort by start address. */
2092    VG_(ssort)(di->inltab, di->inltab_used,
2093                           sizeof(*di->inltab), compare_DiInlLoc);
2094 
2095    /* Ensure relevant postconditions hold. */
2096    for (i = 0; i < ((Word)di->inltab_used)-1; i++) {
2097       /* No zero-sized inlined call. */
2098       vg_assert(di->inltab[i].addr_lo < di->inltab[i].addr_hi);
2099       /* In order, but we can have duplicates and overlapping ranges. */
2100       vg_assert(di->inltab[i].addr_lo <= di->inltab[i+1].addr_lo);
2101    }
2102 
2103    /* Free up unused space at the end of the table. */
2104    shrinkInlTab(di);
2105 }
2106 
2107 
2108 /* Sort the call-frame-info cfsi_rd by starting address.  Mash the table
2109    around so as to establish the property that addresses are in order
2110    and the ranges do not overlap.  This facilitates using binary
2111    search to map addresses to locations when we come to query the
2112    table.
2113 
2114    Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of
2115    any of the address ranges contained in cfisi[0 .. cfisi_used-1], so
2116    as to facilitate rapidly skipping this SegInfo when looking for an
2117    address which falls outside that range.
2118 */
compare_DiCfSI(const void * va,const void * vb)2119 static Int compare_DiCfSI ( const void* va, const void* vb )
2120 {
2121    const DiCfSI* a = va;
2122    const DiCfSI* b = vb;
2123    if (a->base < b->base) return -1;
2124    if (a->base > b->base) return  1;
2125    return 0;
2126 }
2127 
get_cfsi_rd_stats(const DebugInfo * di,UWord * n_mergeables,UWord * n_holes)2128 static void get_cfsi_rd_stats ( const DebugInfo* di,
2129                                 UWord *n_mergeables, UWord *n_holes )
2130 {
2131    Word i;
2132 
2133    *n_mergeables = 0;
2134    *n_holes = 0;
2135 
2136    vg_assert (di->cfsi_used == 0 || di->cfsi_rd);
2137    for (i = 1; i < (Word)di->cfsi_used; i++) {
2138       Addr here_min = di->cfsi_rd[i].base;
2139       Addr prev_max = di->cfsi_rd[i-1].base + di->cfsi_rd[i-1].len - 1;
2140       Addr sep = here_min - prev_max;
2141       if (sep > 1)
2142          (*n_holes)++;
2143       if (sep == 1 && di->cfsi_rd[i-1].cfsi_m_ix == di->cfsi_rd[i].cfsi_m_ix)
2144          (*n_mergeables)++;
2145    }
2146 }
2147 
ML_(canonicaliseCFI)2148 void ML_(canonicaliseCFI) ( struct _DebugInfo* di )
2149 {
2150    Word  i, j;
2151    const Addr minAvma = 0;
2152    const Addr maxAvma = ~minAvma;
2153 
2154    /* Note: take care in here.  di->cfsi can be NULL, in which
2155       case _used and _size fields will be zero. */
2156    if (di->cfsi_rd == NULL) {
2157       vg_assert(di->cfsi_used == 0);
2158       vg_assert(di->cfsi_size == 0);
2159       vg_assert(di->cfsi_m_pool == NULL);
2160    } else {
2161       vg_assert(di->cfsi_size != 0);
2162       vg_assert(di->cfsi_m_pool != NULL);
2163    }
2164 
2165    /* Set cfsi_minavma and cfsi_maxavma to summarise the entire
2166       address range contained in cfsi_rd[0 .. cfsi_used-1]. */
2167    di->cfsi_minavma = maxAvma;
2168    di->cfsi_maxavma = minAvma;
2169    for (i = 0; i < (Word)di->cfsi_used; i++) {
2170       Addr here_min = di->cfsi_rd[i].base;
2171       Addr here_max = di->cfsi_rd[i].base + di->cfsi_rd[i].len - 1;
2172       if (here_min < di->cfsi_minavma)
2173          di->cfsi_minavma = here_min;
2174       if (here_max > di->cfsi_maxavma)
2175          di->cfsi_maxavma = here_max;
2176    }
2177 
2178    if (di->trace_cfi)
2179       VG_(printf)("canonicaliseCfiSI: %lu entries, %#lx .. %#lx\n",
2180                   di->cfsi_used,
2181                   di->cfsi_minavma, di->cfsi_maxavma);
2182 
2183    /* Sort the cfsi_rd array by base address. */
2184    VG_(ssort)(di->cfsi_rd, di->cfsi_used, sizeof(*di->cfsi_rd), compare_DiCfSI);
2185 
2186    /* If two adjacent entries overlap, truncate the first. */
2187    for (i = 0; i < (Word)di->cfsi_used-1; i++) {
2188       if (di->cfsi_rd[i].base + di->cfsi_rd[i].len > di->cfsi_rd[i+1].base) {
2189          Word new_len = di->cfsi_rd[i+1].base - di->cfsi_rd[i].base;
2190          /* how could it be otherwise?  The entries are sorted by the
2191             .base field. */
2192          vg_assert(new_len >= 0);
2193 	 vg_assert(new_len <= di->cfsi_rd[i].len);
2194          di->cfsi_rd[i].len = new_len;
2195       }
2196    }
2197 
2198    /* Zap any zero-sized entries resulting from the truncation
2199       process. */
2200    j = 0;
2201    for (i = 0; i < (Word)di->cfsi_used; i++) {
2202       if (di->cfsi_rd[i].len > 0) {
2203          if (j != i)
2204             di->cfsi_rd[j] = di->cfsi_rd[i];
2205          j++;
2206       }
2207    }
2208    /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */
2209    di->cfsi_used = j;
2210 
2211    /* Ensure relevant postconditions hold. */
2212    for (i = 0; i < (Word)di->cfsi_used; i++) {
2213       /* No zero-length ranges. */
2214       vg_assert(di->cfsi_rd[i].len > 0);
2215       /* Makes sense w.r.t. summary address range */
2216       vg_assert(di->cfsi_rd[i].base >= di->cfsi_minavma);
2217       vg_assert(di->cfsi_rd[i].base + di->cfsi_rd[i].len - 1
2218                 <= di->cfsi_maxavma);
2219 
2220       if (i < di->cfsi_used - 1) {
2221          /*
2222          if (!(di->cfsi[i].base < di->cfsi[i+1].base)) {
2223             VG_(printf)("\nOOO cfsis:\n");
2224             ML_(ppCfiSI)(&di->cfsi[i]);
2225             ML_(ppCfiSI)(&di->cfsi[i+1]);
2226          }
2227          */
2228          /* In order. */
2229          vg_assert(di->cfsi_rd[i].base < di->cfsi_rd[i+1].base);
2230          /* No overlaps. */
2231          vg_assert(di->cfsi_rd[i].base + di->cfsi_rd[i].len - 1
2232                    < di->cfsi_rd[i+1].base);
2233       }
2234    }
2235 
2236    if (VG_(clo_stats) && VG_(clo_verbosity) >= 3) {
2237       UWord n_mergeables, n_holes;
2238       get_cfsi_rd_stats (di, &n_mergeables, &n_holes);
2239       VG_(dmsg)("CFSI total %lu mergeables %lu holes %lu uniq cfsi_m %u\n",
2240                 di->cfsi_used, n_mergeables, n_holes,
2241                 di->cfsi_m_pool ? VG_(sizeDedupPA) (di->cfsi_m_pool) : 0);
2242    }
2243 }
2244 
ML_(finish_CFSI_arrays)2245 void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di )
2246 {
2247    UWord n_mergeables, n_holes;
2248    UWord new_used;
2249    UWord i;
2250    UWord pos;
2251    UWord f_mergeables, f_holes;
2252    UInt sz_cfsi_m_pool;
2253 
2254    get_cfsi_rd_stats (di, &n_mergeables, &n_holes);
2255 
2256    if (di->cfsi_used == 0) {
2257       vg_assert (di->cfsi_rd == NULL);
2258       vg_assert (di->cfsi_m_pool == NULL);
2259       vg_assert (n_mergeables == 0);
2260       vg_assert (n_holes == 0);
2261       return;
2262    }
2263 
2264    vg_assert (di->cfsi_used > n_mergeables);
2265    new_used = di->cfsi_used - n_mergeables + n_holes;
2266 
2267    sz_cfsi_m_pool = VG_(sizeDedupPA)(di->cfsi_m_pool);
2268    vg_assert (sz_cfsi_m_pool > 0);
2269    if (sz_cfsi_m_pool <= 255)
2270       di->sizeof_cfsi_m_ix = 1;
2271    else if (sz_cfsi_m_pool <= 65535)
2272       di->sizeof_cfsi_m_ix = 2;
2273    else
2274       di->sizeof_cfsi_m_ix = 4;
2275 
2276    di->cfsi_base = ML_(dinfo_zalloc)( "di.storage.finCfSI.1",
2277                                        new_used * sizeof(Addr) );
2278    di->cfsi_m_ix = ML_(dinfo_zalloc)( "di.storage.finCfSI.2",
2279                                       new_used * sizeof(UChar)*di->sizeof_cfsi_m_ix);
2280 
2281    pos = 0;
2282    f_mergeables = 0;
2283    f_holes = 0;
2284    for (i = 0; i < (Word)di->cfsi_used; i++) {
2285       if (i > 0) {
2286          Addr here_min = di->cfsi_rd[i].base;
2287          Addr prev_max = di->cfsi_rd[i-1].base + di->cfsi_rd[i-1].len - 1;
2288          SizeT sep = here_min - prev_max;
2289 
2290          // Skip a mergeable entry.
2291          if (sep == 1) {
2292             if (di->cfsi_rd[i-1].cfsi_m_ix == di->cfsi_rd[i].cfsi_m_ix) {
2293                f_mergeables++;
2294                continue;
2295             }
2296          }
2297          // Insert a hole if needed.
2298          if (sep > 1) {
2299             f_holes++;
2300             di->cfsi_base[pos] = prev_max + 1;
2301             switch (di->sizeof_cfsi_m_ix) {
2302                case 1: ((UChar*) di->cfsi_m_ix)[pos] = 0; break;
2303                case 2: ((UShort*)di->cfsi_m_ix)[pos] = 0; break;
2304                case 4: ((UInt*)  di->cfsi_m_ix)[pos] = 0; break;
2305                default: vg_assert(0);
2306             }
2307             pos++;
2308          }
2309       }
2310 
2311       // Insert the cfsi entry i.
2312       di->cfsi_base[pos] = di->cfsi_rd[i].base;
2313       switch (di->sizeof_cfsi_m_ix) {
2314          case 1: ((UChar*) di->cfsi_m_ix)[pos] = di->cfsi_rd[i].cfsi_m_ix; break;
2315          case 2: ((UShort*)di->cfsi_m_ix)[pos] = di->cfsi_rd[i].cfsi_m_ix; break;
2316          case 4: ((UInt*)  di->cfsi_m_ix)[pos] = di->cfsi_rd[i].cfsi_m_ix; break;
2317          default: vg_assert(0);
2318       }
2319       pos++;
2320    }
2321 
2322    vg_assert (f_mergeables == n_mergeables);
2323    vg_assert (f_holes == n_holes);
2324    vg_assert (pos == new_used);
2325 
2326    di->cfsi_used = new_used;
2327    di->cfsi_size = new_used;
2328    ML_(dinfo_free) (di->cfsi_rd);
2329    di->cfsi_rd = NULL;
2330 }
2331 
2332 
2333 /* Canonicalise the tables held by 'di', in preparation for use.  Call
2334    this after finishing adding entries to these tables. */
ML_(canonicaliseTables)2335 void ML_(canonicaliseTables) ( struct _DebugInfo* di )
2336 {
2337    canonicaliseSymtab ( di );
2338    canonicaliseLoctab ( di );
2339    canonicaliseInltab ( di );
2340    ML_(canonicaliseCFI) ( di );
2341    if (di->cfsi_m_pool)
2342       VG_(freezeDedupPA) (di->cfsi_m_pool, ML_(dinfo_shrink_block));
2343    canonicaliseVarInfo ( di );
2344    if (di->strpool)
2345       VG_(freezeDedupPA) (di->strpool, ML_(dinfo_shrink_block));
2346    if (di->fndnpool)
2347       VG_(freezeDedupPA) (di->fndnpool, ML_(dinfo_shrink_block));
2348 }
2349 
2350 
2351 /*------------------------------------------------------------*/
2352 /*--- Searching the tables                                 ---*/
2353 /*------------------------------------------------------------*/
2354 
2355 /* Find a symbol-table index containing the specified pointer, or -1
2356    if not found.  Binary search.  */
2357 
ML_(search_one_symtab)2358 Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
2359                               Bool findText )
2360 {
2361    Addr a_mid_lo, a_mid_hi;
2362    Word mid,
2363         lo = 0,
2364         hi = di->symtab_used-1;
2365    while (True) {
2366       /* current unsearched space is from lo to hi, inclusive. */
2367       if (lo > hi) return -1; /* not found */
2368       mid      = (lo + hi) / 2;
2369       a_mid_lo = di->symtab[mid].avmas.main;
2370       a_mid_hi = ((Addr)di->symtab[mid].avmas.main) + di->symtab[mid].size - 1;
2371 
2372       if (ptr < a_mid_lo) { hi = mid-1; continue; }
2373       if (ptr > a_mid_hi) { lo = mid+1; continue; }
2374       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
2375       /* Found a symbol with the correct address range.  But is it
2376          of the right kind (text vs data) ? */
2377       if (  findText   &&   di->symtab[mid].isText  ) return mid;
2378       if ( (!findText) && (!di->symtab[mid].isText) ) return mid;
2379       return -1;
2380    }
2381 }
2382 
2383 
2384 /* Find a location-table index containing the specified pointer, or -1
2385    if not found.  Binary search.  */
2386 
ML_(search_one_loctab)2387 Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr )
2388 {
2389    Addr a_mid_lo, a_mid_hi;
2390    Word mid,
2391         lo = 0,
2392         hi = di->loctab_used-1;
2393    while (True) {
2394       /* current unsearched space is from lo to hi, inclusive. */
2395       if (lo > hi) return -1; /* not found */
2396       mid      = (lo + hi) / 2;
2397       a_mid_lo = di->loctab[mid].addr;
2398       a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1;
2399 
2400       if (ptr < a_mid_lo) { hi = mid-1; continue; }
2401       if (ptr > a_mid_hi) { lo = mid+1; continue; }
2402       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
2403       return mid;
2404    }
2405 }
2406 
2407 
2408 /* Find a CFI-table index containing the specified pointer, or -1
2409    if not found.  Binary search.  */
2410 
ML_(search_one_cfitab)2411 Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr )
2412 {
2413    Word mid,
2414         lo = 0,
2415         hi = di->cfsi_used-1;
2416 
2417    while (lo <= hi) {
2418       /* Invariants : hi == cfsi_used-1 || ptr < cfsi_base[hi+1]
2419                       lo == 0           || ptr > cfsi_base[lo-1]
2420          (the first part of the invariants is similar to considering
2421          that cfsi_base[-1] is 0 and cfsi_base[cfsi_used] is ~0) */
2422       mid      = (lo + hi) / 2;
2423       if (ptr < di->cfsi_base[mid]) { hi = mid-1; continue; }
2424       if (ptr > di->cfsi_base[mid]) { lo = mid+1; continue; }
2425       lo = mid+1; break;
2426    }
2427 
2428 #if 0
2429    for (mid = 0; mid <= di->cfsi_used-1; mid++)
2430       if (ptr < di->cfsi_base[mid])
2431          break;
2432    vg_assert (lo - 1 == mid - 1);
2433 #endif
2434    return lo - 1;
2435 }
2436 
2437 
2438 /* Find a FPO-table index containing the specified pointer, or -1
2439    if not found.  Binary search.  */
2440 
ML_(search_one_fpotab)2441 Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr )
2442 {
2443    Addr const addr = ptr - di->fpo_base_avma;
2444    Addr a_mid_lo, a_mid_hi;
2445    Word mid, size,
2446         lo = 0,
2447         hi = di->fpo_size-1;
2448    while (True) {
2449       /* current unsearched space is from lo to hi, inclusive. */
2450       if (lo > hi) return -1; /* not found */
2451       mid      = (lo + hi) / 2;
2452       a_mid_lo = di->fpo[mid].ulOffStart;
2453       size     = di->fpo[mid].cbProcSize;
2454       a_mid_hi = a_mid_lo + size - 1;
2455       vg_assert(a_mid_hi >= a_mid_lo);
2456       if (addr < a_mid_lo) { hi = mid-1; continue; }
2457       if (addr > a_mid_hi) { lo = mid+1; continue; }
2458       vg_assert(addr >= a_mid_lo && addr <= a_mid_hi);
2459       return mid;
2460    }
2461 }
2462 
2463 /*--------------------------------------------------------------------*/
2464 /*--- end                                                          ---*/
2465 /*--------------------------------------------------------------------*/
2466