1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Segment name management                 aspacemgr-segnames.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2015-2017  Florian Krohm
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /* Segment names are stored in a string table.
32 
33    The string table is organised into slots of varying length. Slots are
34    adjacent and there are no holes between slots.
35    A slot consists of two parts:
36 
37    (1) a fixed size overhead of length 4 bytes
38    (2) a variable size payload of up to 65535 bytes
39 
40    The segment name is stored in the payload area. Therefore:
41    a segment name cannot be longer than 65535 bytes including the '\0'
42    terminator. This looks like a reasonable limitation.
43 
44    Overall slot layout:
45 
46        |          4 bytes            |    max 65535 bytes      |
47        +-----------------------------+-------------------------+
48        |          overhead           |         payload         |
49        +-----------------------------+-------------------------+
50        ^                             ^
51        |                             |
52       -4                             +----- seg->fnIdx
53 
54    Each slot is uniquely identified by an index which points to the first
55    byte of the payload area. It is this value that is stored in seg->fnIdx.
56    Note, that this value is at least 4.
57 
58    A slot either holds a string or it is free. The status of a slot is
59    identified by the leftmost bit in the overhead field, the so called F-bit.
60    F-bit == 1 means that slot is free; otherwise it is occupied and holds a
61    string.
62 
63    Slot containing a string (segment name):
64 
65   bits | 1 |      15      |    16    |
66        +---+--------------+----------+-------------------------+
67        | 0 |   refcount   | slotsize | the string including \0 |
68        +---+--------------+----------+-------------------------+
69        ^                  ^          ^
70        |                  |          |
71       -4                 -2          +----- seg->fnIdx
72 
73    Segment names are reference counted. 15 bits are available which allows
74    for up to 32767 references. If the string is referenced more than 32767
75    times, the reference count will be frozen and the slot can never
76    become free. I'm not unduly concerned.
77    Two bytes are reserved to hold the size of the slot. Well, it's actually
78    the size of the payload aread (i.e. the size of the slot minus the
79    overhead). Ah well -- the name sticks.
80    With two bytes to store the size, the payload area can be at most 65535
81    bytes large.
82 
83    A free slot looks like this:
84 
85   bits | 1 |           31            |    16    |
86        +---+-------------------------+----------+--------------+
87        | 1 | index of next free slot | slotsize | .. unused .. |
88        +---+-------------------------+----------+--------------+
89        ^                             ^
90        |                             |
91       -4                             +----- seg->fnIdx
92 
93    Free slots are chained together in a singly linked list. An index of
94    zero indicates the end of the chain. Note that zero cannot conflict
95    with an index into the string table as the minimum index is at least
96    four (see above).
97 
98    The typical way to traverse the segment names is:
99 
100    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
101       if (is_freeslot(ix))
102          do this
103       else
104          do that
105    }
106 
107    Important detail: there is a sentinel at the end of the list, namely a
108    slot with a zero-sized payload area.
109 
110    Whenever a new segment name needs to be stashed away, the list of free
111    slots is traversed and the first slot which is large enough is being taken
112    (first fit). There will be no splitting of slots, as that complicates
113    matters and without slot coalescing would lead to memory fragmentation.
114    So we leave it as is until a use case comes up that needs something better.
115 */
116 
117 #include "pub_core_basics.h"     // types
118 #include "priv_aspacemgr.h"
119 
120 // A few constants.
121 enum {
122    refcount_size = sizeof(UShort),
123    slotsize_size = sizeof(UShort),
124    overhead = refcount_size + slotsize_size,
125    max_refcount  = 0x7fff,      // 2 bytes - F-bit
126    max_slotsize  = 0xffff,      // 2 bytes
127    max_slotindex = 0x7fffffff,  // 4 bytes - F-bit
128    fbit_mask_value = 0x80,
129    end_of_chain = 0
130 };
131 
132 static const UInt fbit_mask = fbit_mask_value;
133 
134 /* The old segname implementation allowed for 1000 names on Android and
135    6000 names on other platforms. Each name was allowed to be 1000 characters
136    long. That was very wasteful. */
137 #define VG_TABLE_SIZE 1000000
138 
139 /* String table for segment names */
140 
141 static HChar segnames[VG_TABLE_SIZE];  /* her majesty, the string table */
142 static SizeT segnames_used = 0;        /* number of bytes used */
143 static UInt  num_segnames = 0;         /* number of names in string table */
144 static UInt  num_slots = 0;            /* number of slots in string table */
145 static UInt  freeslot_chain = end_of_chain;
146 
147 static Bool
is_freeslot(UInt ix)148 is_freeslot(UInt ix)
149 {
150    aspacem_assert(ix >= overhead && ix <= segnames_used);
151    return (segnames[ix - 4] & fbit_mask) != 0;
152 }
153 
154 static void
put_slotindex(UInt ix,UInt slotindex)155 put_slotindex(UInt ix, UInt slotindex)
156 {
157    aspacem_assert(ix >= overhead && ix <= segnames_used);
158    if (slotindex != 0)
159       aspacem_assert(slotindex >= overhead && slotindex <= segnames_used);
160 
161    slotindex |= fbit_mask << 24;
162    segnames[ix - 1] = slotindex & 0xFF;   slotindex >>= 8;
163    segnames[ix - 2] = slotindex & 0xFF;   slotindex >>= 8;
164    segnames[ix - 3] = slotindex & 0xFF;   slotindex >>= 8;
165    segnames[ix - 4] = slotindex & 0xFF;
166 }
167 
168 static UInt
get_slotindex(UInt ix)169 get_slotindex(UInt ix)
170 {
171    aspacem_assert(ix >= overhead && ix <= segnames_used);
172    aspacem_assert(is_freeslot(ix));
173 
174    // Avoid unexpected sign extension
175    const UChar *unames = (const UChar *)segnames;
176 
177    UInt slotindex = 0;
178    slotindex |= unames[ix - 4];   slotindex <<= 8;
179    slotindex |= unames[ix - 3];   slotindex <<= 8;
180    slotindex |= unames[ix - 2];   slotindex <<= 8;
181    slotindex |= unames[ix - 1];
182 
183    return slotindex & max_slotindex;   // removes the F-bit
184 }
185 
186 static void
put_slotsize(UInt ix,UInt size)187 put_slotsize(UInt ix, UInt size)
188 {
189    aspacem_assert(ix >= overhead && ix <= segnames_used);
190    aspacem_assert(size <= max_slotsize);
191    segnames[ix - 1] = size & 0xff;
192    segnames[ix - 2] = size >> 8;
193 }
194 
195 static UInt
get_slotsize(UInt ix)196 get_slotsize(UInt ix)
197 {
198    aspacem_assert(ix >= overhead && ix <= segnames_used);
199 
200    // Avoid unexpected sign extension
201    const UChar *unames = (const UChar *)segnames;
202    if (is_freeslot(ix))
203       return (unames[ix] << 8) | unames[ix+1];
204    else
205       return (unames[ix - 2] << 8) | unames[ix - 1];
206 }
207 
208 static void
put_refcount(UInt ix,UInt rc)209 put_refcount(UInt ix, UInt rc)
210 {
211    aspacem_assert(ix >= overhead && ix <= segnames_used);
212    aspacem_assert(rc <= max_refcount);
213    // rc <= max_refcount ensures that the F-bit is zero
214    segnames[ix - 3] = rc & 0xff;
215    segnames[ix - 4] = rc >> 8;
216 }
217 
218 static UInt
get_refcount(UInt ix)219 get_refcount(UInt ix)
220 {
221    aspacem_assert(ix >= overhead && ix <= segnames_used);
222    // must not be a free slot
223    aspacem_assert(! is_freeslot(ix));
224 
225    // Avoid unexpected sign extension
226    const UChar *unames = (const UChar *)segnames;
227    return (unames[ix - 4] << 8) | unames[ix - 3];
228 }
229 
230 static void
inc_refcount(UInt ix)231 inc_refcount(UInt ix)
232 {
233    aspacem_assert(ix >= overhead && ix <= segnames_used);
234    UInt rc = get_refcount(ix);
235    if (rc != max_refcount)
236       put_refcount(ix, rc + 1);
237 }
238 
239 static void
dec_refcount(UInt ix)240 dec_refcount(UInt ix)
241 {
242    aspacem_assert(ix >= overhead && ix <= segnames_used);
243    UInt rc = get_refcount(ix);
244    aspacem_assert(rc > 0);
245    if (rc != max_refcount) {
246       --rc;
247       if (rc != 0) {
248          put_refcount(ix, rc);
249       } else {
250          UInt size = get_slotsize(ix);
251          /* Chain this slot in the freelist */
252          put_slotindex(ix, freeslot_chain);
253          put_slotsize(ix + slotsize_size, size);
254          freeslot_chain = ix;
255          --num_segnames;
256          if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0");
257       }
258    }
259 }
260 
261 static void
put_sentinel(UInt ix)262 put_sentinel(UInt ix)
263 {
264    aspacem_assert(ix >= overhead && ix <= segnames_used);
265 
266    put_refcount(ix, 0);
267    put_slotsize(ix, 0);
268 }
269 
270 
271 /* Searches the string table to find an index for the given name.
272    If none is found, an index is allocated and the name stored.
273    If running ouf of memory, return -1. */
274 Int
ML_(am_allocate_segname)275 ML_(am_allocate_segname)(const HChar *name)
276 {
277    UInt len, ix, size, next_freeslot;
278 
279    aspacem_assert(name);
280 
281    if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name);
282 
283    len = VG_(strlen)(name);
284 
285    /* First see if we already have the name. */
286    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
287       if (is_freeslot(ix)) continue;
288       if (VG_(strcmp)(name, segnames + ix) == 0) {
289          inc_refcount(ix);
290          return ix;
291       }
292    }
293 
294    /* Is there a free slot in the string table from a previously "freed"
295       segment name ? */
296    Int prev;
297    for (prev = -1, ix = freeslot_chain; ix != end_of_chain;
298         prev = ix, ix = next_freeslot) {
299       next_freeslot = get_slotindex(ix);  // next in chain
300       size = get_slotsize(ix);
301 
302       if (size >= len + 1) {
303          /* Note, if the size of the slot is a lot larger than the length
304             of the string we're about to store in it, we could split the
305             slot into two. But that complicates matters and as we're not
306             doing any coalescing of adjacent free slots this could lead to
307             fragmentation. */
308          if (prev == -1)
309             freeslot_chain = next_freeslot;
310          else
311             put_slotindex(prev, next_freeslot);
312          put_refcount(ix, 0);
313          put_slotsize(ix, size);
314          VG_(strcpy)(segnames + ix, name);
315          ++num_segnames;
316          return ix;
317       }
318    }
319 
320    /* We need to add a new name. */
321 
322    /* Note, that we need at least two bytes in the payload. The reason is
323       that the payload area will be used to store the size of the slot when
324       the slot is on the freelist. */
325    if (len == 0) len = 1;
326 
327    /* Is there enough room in the string table? The OVERHEAD is for the
328       sentinel following the payload of new slot. */
329    SizeT need = len + 1 + overhead;
330    if (need > (sizeof segnames) - segnames_used) {
331       return -1;
332    }
333 
334    ++num_segnames;
335    ++num_slots;
336 
337    /* copy it in */
338    ix = segnames_used;
339    put_refcount(ix, 0);
340    put_slotsize(ix, len + 1);
341    VG_(strcpy)(segnames + ix, name);
342    segnames_used += need;
343 
344    /* Add sentinel at end of segment name list */
345    put_sentinel(segnames_used);
346 
347    return ix;
348 }
349 
350 /* Debugging output */
351 void
ML_(am_show_segnames)352 ML_(am_show_segnames)(Int logLevel, const HChar *prefix)
353 {
354    UInt size, ix, i;
355 
356    VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n",
357                  num_segnames, num_slots);
358 
359    if (freeslot_chain == end_of_chain)
360       VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n");
361    else
362       VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n",
363                     freeslot_chain);
364    for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0;
365         ix += size + overhead, ++i) {
366       if (is_freeslot(ix))
367          VG_(debugLog)(logLevel, "aspacem",
368                        "(%u,%u,0) [free slot: size=%u  next=%u]\n", i, ix,
369                        get_slotsize(ix), get_slotindex(ix));
370       else
371          VG_(debugLog)(logLevel, "aspacem",
372                        "(%u,%u,%u) %s\n", i, ix, get_refcount(ix),
373                        segnames + ix);
374    }
375 }
376 
377 /* Returns a sequence number for the fnIdx position in segnames.
378    Used in aspacemgr debug output to associate a segment with
379    a segment name. */
380 Int
ML_(am_segname_get_seqnr)381 ML_(am_segname_get_seqnr)(Int fnIdx)
382 {
383    SizeT ix, size;
384    Int seqnr = -1;
385 
386    if (fnIdx == -1) return -1;   // shortcut
387 
388    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
389       seqnr++;
390       if (ix == fnIdx)
391          return seqnr;
392    }
393 
394    // We should always find the given index; something's busted
395    aspacem_assert(0);
396    return -1;
397 }
398 
399 /* Initialise the string table for segment names. It contains an empty
400    string which is not referenced. */
401 void
ML_(am_segnames_init)402 ML_(am_segnames_init)(void)
403 {
404    aspacem_assert(sizeof segnames >= overhead);
405 
406    segnames_used = overhead;
407    put_sentinel(segnames_used);
408 }
409 
410 /* Increase reference count of segment name identified by IX. */
411 void
ML_(am_inc_refcount)412 ML_(am_inc_refcount)(Int ix)
413 {
414    if (ix != -1)
415       inc_refcount(ix);
416 }
417 
418 /* Decrease reference count of segment name identified by IX. */
419 void
ML_(am_dec_refcount)420 ML_(am_dec_refcount)(Int ix)
421 {
422    if (ix != -1)
423       dec_refcount(ix);
424 }
425 
426 Bool
ML_(am_sane_segname)427 ML_(am_sane_segname)(Int ix)
428 {
429    return ix == -1 || (ix >= overhead && ix < segnames_used);
430 }
431 
432 const HChar *
ML_(am_get_segname)433 ML_(am_get_segname)(Int ix)
434 {
435    return (ix == -1) ? NULL : segnames + ix;
436 }
437 
438 /*--------------------------------------------------------------------*/
439 /*--- end                                                          ---*/
440 /*--------------------------------------------------------------------*/
441