1
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking ---*/
4 /*--- the happens-before relationship in concurrent programs. ---*/
5 /*--- libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2017 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33 #include "pub_tool_basics.h"
34 #include "pub_tool_poolalloc.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcbase.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_machine.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_wordfm.h"
41 #include "pub_tool_hashtable.h"
42 #include "pub_tool_xarray.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_threadstate.h"
45 #include "pub_tool_aspacemgr.h"
46 #include "pub_tool_stacktrace.h"
47 #include "pub_tool_execontext.h"
48 #include "pub_tool_errormgr.h"
49 #include "pub_tool_debuginfo.h"
50 #include "pub_tool_gdbserver.h"
51 #include "pub_tool_options.h" // VG_(clo_stats)
52 #include "hg_basics.h"
53 #include "hg_wordset.h"
54 #include "hg_lock_n_thread.h"
55 #include "hg_errors.h"
56
57 #include "libhb.h"
58
59
60 /////////////////////////////////////////////////////////////////
61 /////////////////////////////////////////////////////////////////
62 // //
63 // Debugging #defines //
64 // //
65 /////////////////////////////////////////////////////////////////
66 /////////////////////////////////////////////////////////////////
67
68 /* Check the sanity of shadow values in the core memory state
69 machine. Change #if 0 to #if 1 to enable this. */
70 #if 0
71 # define CHECK_MSM 1
72 #else
73 # define CHECK_MSM 0
74 #endif
75
76
77 /* Check sanity (reference counts, etc) in the conflicting access
78 machinery. Change #if 0 to #if 1 to enable this. */
79 #if 0
80 # define CHECK_CEM 1
81 #else
82 # define CHECK_CEM 0
83 #endif
84
85
86 /* Check sanity in the compressed shadow memory machinery,
87 particularly in its caching innards. Unfortunately there's no
88 almost-zero-cost way to make them selectable at run time. Hence
89 set the #if 0 to #if 1 and rebuild if you want them. */
90 #if 0
91 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
92 # define inline __attribute__((noinline))
93 /* probably want to ditch -fomit-frame-pointer too */
94 #else
95 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
96 #endif
97
98 /* Define to 1 to activate tracing cached rcec. */
99 #define DEBUG_CACHED_RCEC 0
100
101 /////////////////////////////////////////////////////////////////
102 /////////////////////////////////////////////////////////////////
103 // //
104 // data decls: VtsID //
105 // //
106 /////////////////////////////////////////////////////////////////
107 /////////////////////////////////////////////////////////////////
108
109 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
110 bits, since they have to be packed into the lowest 30 bits of an
111 SVal. */
112 typedef UInt VtsID;
113 #define VtsID_INVALID 0xFFFFFFFF
114
115
116
117 /////////////////////////////////////////////////////////////////
118 /////////////////////////////////////////////////////////////////
119 // //
120 // data decls: SVal //
121 // //
122 /////////////////////////////////////////////////////////////////
123 /////////////////////////////////////////////////////////////////
124
125 typedef ULong SVal;
126
127 /* This value has special significance to the implementation, and callers
128 may not store it in the shadow memory. */
129 #define SVal_INVALID (3ULL << 62)
130
131 /* This is the default value for shadow memory. Initially the shadow
132 memory contains no accessible areas and so all reads produce this
133 value. TODO: make this caller-defineable. */
134 #define SVal_NOACCESS (2ULL << 62)
135
136
137
138 /////////////////////////////////////////////////////////////////
139 /////////////////////////////////////////////////////////////////
140 // //
141 // data decls: ScalarTS //
142 // //
143 /////////////////////////////////////////////////////////////////
144 /////////////////////////////////////////////////////////////////
145
146 /* Scalar Timestamp. We have to store a lot of these, so there is
147 some effort to make them as small as possible. Logically they are
148 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
149 We pack it into 64 bits by representing the Thr* using a ThrID, a
150 small integer (18 bits), and a 46 bit integer for the timestamp
151 number. The 46/18 split is arbitrary, but has the effect that
152 Helgrind can only handle programs that create 2^18 or fewer threads
153 over their entire lifetime, and have no more than 2^46 timestamp
154 ticks (synchronisation operations on the same thread).
155
156 This doesn't seem like much of a limitation. 2^46 ticks is
157 7.06e+13, and if each tick (optimistically) takes the machine 1000
158 cycles to process, then the minimum time to process that many ticks
159 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
160 but VTS ticks, which isn't realistic.
161
162 NB1: SCALARTS_N_THRBITS must be 27 or lower. The obvious limit is
163 32 since a ThrID is a UInt. 27 comes from the fact that
164 'Thr_n_RCEC', which records information about old accesses, packs
165 in tsw not only a ThrID but also minimum 4+1 other bits (access size
166 and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
167
168 NB2: thrid values are issued upwards from 1024, and values less
169 than that aren't valid. This isn't per se necessary (any order
170 will do, so long as they are unique), but it does help ensure they
171 are less likely to get confused with the various other kinds of
172 small-integer thread ids drifting around (eg, TId).
173 So, SCALARTS_N_THRBITS must be 11 or more.
174 See also NB5.
175
176 NB3: this probably also relies on the fact that Thr's are never
177 deallocated -- they exist forever. Hence the 1-1 mapping from
178 Thr's to thrid values (set up in Thr__new) persists forever.
179
180 NB4: temp_max_sized_VTS is allocated at startup and never freed.
181 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
182 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
183 making the memory use for this go sky-high. With
184 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
185 like an OK tradeoff. If more than 256k threads need to be
186 supported, we could change SCALARTS_N_THRBITS to 20, which would
187 facilitate supporting 1 million threads at the cost of 8MB storage
188 for temp_max_sized_VTS.
189
190 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
191 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
192 must never be a valid ThrID. Given NB2 that's OK.
193 */
194 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 27 inclusive,
195 See NB1 and NB2 above. */
196
197 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
198 typedef
199 struct {
200 ThrID thrid : SCALARTS_N_THRBITS;
201 ULong tym : SCALARTS_N_TYMBITS;
202 }
203 ScalarTS;
204
205 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
206
207
208
209 /////////////////////////////////////////////////////////////////
210 /////////////////////////////////////////////////////////////////
211 // //
212 // data decls: Filter //
213 // //
214 /////////////////////////////////////////////////////////////////
215 /////////////////////////////////////////////////////////////////
216
217 // baseline: 5, 9
218 #define FI_LINE_SZB_LOG2 5
219 #define FI_NUM_LINES_LOG2 10
220
221 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
222 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
223
224 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
225 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
226
227 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
228 & (Addr)(FI_NUM_LINES-1) )
229
230
231 /* In the lines, each 8 bytes are treated individually, and are mapped
232 to a UShort. Regardless of endianness of the underlying machine,
233 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
234 the highest address.
235
236 Of each bit pair, the higher numbered bit is set if a R has been
237 seen, so the actual layout is:
238
239 15 14 ... 01 00
240
241 R W for addr+7 ... R W for addr+0
242
243 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
244 */
245
246 /* tags are separated from lines. tags are Addrs and are
247 the base address of the line. */
248 typedef
249 struct {
250 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
251 }
252 FiLine;
253
254 typedef
255 struct {
256 Addr tags[FI_NUM_LINES];
257 FiLine lines[FI_NUM_LINES];
258 }
259 Filter;
260
261
262
263 /////////////////////////////////////////////////////////////////
264 /////////////////////////////////////////////////////////////////
265 // //
266 // data decls: Thr, ULong_n_EC //
267 // //
268 /////////////////////////////////////////////////////////////////
269 /////////////////////////////////////////////////////////////////
270
271 // Records stacks for H1 history mechanism (DRD-style)
272 typedef
273 struct { ULong ull; ExeContext* ec; }
274 ULong_n_EC;
275
276
277 /* How many of the above records to collect for each thread? Older
278 ones are dumped when we run out of space. 62.5k requires 1MB per
279 thread, since each ULong_n_EC record is 16 bytes long. When more
280 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
281 deleted to make space. Hence in the worst case we will be able to
282 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
283 Kw transitions (segments in this thread). For the current setting
284 that gives a guaranteed stack for at least the last 31.25k
285 segments. */
286 #define N_KWs_N_STACKs_PER_THREAD 62500
287
288
289 #define N_FRAMES 8
290 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
291 #define RCEC_MAGIC 0xab88abb2UL
292
293 /* RCEC usage is commented more in details in the section 'Change-event map2'
294 later in this file */
295 typedef
296 struct _RCEC {
297 UWord magic; /* sanity check only */
298 struct _RCEC* next;
299 UWord rc;
300 UWord rcX; /* used for crosschecking */
301 UWord frames_hash; /* hash of all the frames */
302 UWord frames[N_FRAMES];
303 }
304 RCEC;
305
306 struct _Thr {
307 /* Current VTSs for this thread. They change as we go along. viR
308 is the VTS to be used for reads, viW for writes. Usually they
309 are the same, but can differ when we deal with reader-writer
310 locks. It is always the case that
311 VtsID__cmpLEQ(viW,viR) == True
312 that is, viW must be the same, or lagging behind, viR. */
313 VtsID viR;
314 VtsID viW;
315
316 /* Is initially False, and is set to True after the thread really
317 has done a low-level exit. When True, we expect to never see
318 any more memory references done by this thread. */
319 Bool llexit_done;
320
321 /* Is initially False, and is set to True after the thread has been
322 joined with (reaped by some other thread). After this point, we
323 do not expect to see any uses of .viR or .viW, so it is safe to
324 set them to VtsID_INVALID. */
325 Bool joinedwith_done;
326
327 /* A small integer giving a unique identity to this Thr. See
328 comments on the definition of ScalarTS for details. */
329 ThrID thrid : SCALARTS_N_THRBITS;
330
331 /* A filter that removes references for which we believe that
332 msmcread/msmcwrite will not change the state, nor report a
333 race. */
334 Filter* filter;
335
336 /* A pointer back to the top level Thread structure. There is a
337 1-1 mapping between Thread and Thr structures -- each Thr points
338 at its corresponding Thread, and vice versa. Really, Thr and
339 Thread should be merged into a single structure. */
340 Thread* hgthread;
341
342 /* cached_rcec maintains the last RCEC that was retrieved for this thread. */
343 RCEC cached_rcec; // cached_rcec value, not ref-counted.
344 /* The shadow register vex_shadow1 SP register (SP_s1) is used to maintain
345 the validity of the cached rcec.
346 If SP_s1 is 0, then the cached rcec is invalid (cannot be used).
347 If SP_S1 is != 0, then the cached rcec is valid. The valid cached rcec
348 can be used to generate a new RCEC by changing just the last frame. */
349
350 /* The ULongs (scalar Kws) in this accumulate in strictly
351 increasing order, without duplicates. This is important because
352 we need to be able to find a given scalar Kw in this array
353 later, by binary search. */
354 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
355 };
356
357
358
359 /////////////////////////////////////////////////////////////////
360 /////////////////////////////////////////////////////////////////
361 // //
362 // data decls: SO //
363 // //
364 /////////////////////////////////////////////////////////////////
365 /////////////////////////////////////////////////////////////////
366
367 // (UInt) `echo "Synchronisation object" | md5sum`
368 #define SO_MAGIC 0x56b3c5b0U
369
370 struct _SO {
371 struct _SO* admin_prev;
372 struct _SO* admin_next;
373 VtsID viR; /* r-clock of sender */
374 VtsID viW; /* w-clock of sender */
375 UInt magic;
376 };
377
378
379
380 /////////////////////////////////////////////////////////////////
381 /////////////////////////////////////////////////////////////////
382 // //
383 // Forward declarations //
384 // //
385 /////////////////////////////////////////////////////////////////
386 /////////////////////////////////////////////////////////////////
387
388 /* fwds for
389 Globals needed by other parts of the library. These are set
390 once at startup and then never changed. */
391 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
392 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
393
394 /* misc fn and data fwdses */
395 static void VtsID__rcinc ( VtsID ii );
396 static void VtsID__rcdec ( VtsID ii );
397
398 static inline Bool SVal__isC ( SVal s );
399 static inline VtsID SVal__unC_Rmin ( SVal s );
400 static inline VtsID SVal__unC_Wmin ( SVal s );
401 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
402 static inline void SVal__rcinc ( SVal s );
403 static inline void SVal__rcdec ( SVal s );
404 /* SVal in LineZ are used to store various pointers. */
405 static inline void *SVal2Ptr (SVal s);
406 static inline SVal Ptr2SVal (void* ptr);
407
408 /* A double linked list of all the SO's. */
409 SO* admin_SO;
410
411
412
413 /////////////////////////////////////////////////////////////////
414 /////////////////////////////////////////////////////////////////
415 // //
416 // SECTION BEGIN compressed shadow memory //
417 // //
418 /////////////////////////////////////////////////////////////////
419 /////////////////////////////////////////////////////////////////
420
421 #ifndef __HB_ZSM_H
422 #define __HB_ZSM_H
423
424 /* Initialise the library. Once initialised, it will (or may) call
425 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
426 allow the user to do reference counting on the SVals stored herein.
427 It is important to understand, however, that due to internal
428 caching, the reference counts are in general inaccurate, and can be
429 both above or below the true reference count for an item. In
430 particular, the library may indicate that the reference count for
431 an item is zero, when in fact it is not.
432
433 To make the reference counting exact and therefore non-pointless,
434 call zsm_flush_cache. Immediately after it returns, the reference
435 counts for all items, as deduced by the caller by observing calls
436 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
437 zero reference count may be freed (or at least considered to be
438 unreferenced by this library).
439 */
440 static void zsm_init ( void );
441
442 static void zsm_sset_range ( Addr, SizeT, SVal );
443 static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
444 static void zsm_scopy_range ( Addr, Addr, SizeT );
445 static void zsm_flush_cache ( void );
446
447 #endif /* ! __HB_ZSM_H */
448
449
450 /* Round a up to the next multiple of N. N must be a power of 2 */
451 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
452 /* Round a down to the next multiple of N. N must be a power of 2 */
453 #define ROUNDDN(a, N) ((a) & ~(N-1))
454
455 /* True if a belongs in range [start, start + szB[
456 (i.e. start + szB is excluded). */
address_in_range(Addr a,Addr start,SizeT szB)457 static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
458 {
459 /* Checking start <= a && a < start + szB.
460 As start and a are unsigned addresses, the condition can
461 be simplified. */
462 if (CHECK_ZSM)
463 tl_assert ((a - start < szB)
464 == (start <= a
465 && a < start + szB));
466 return a - start < szB;
467 }
468
469 /* ------ CacheLine ------ */
470
471 #define N_LINE_BITS 6 /* must be >= 3 */
472 #define N_LINE_ARANGE (1 << N_LINE_BITS)
473 #define N_LINE_TREES (N_LINE_ARANGE >> 3)
474
475 typedef
476 struct {
477 UShort descrs[N_LINE_TREES];
478 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
479 }
480 CacheLine;
481
482 #define TREE_DESCR_16_0 (1<<0)
483 #define TREE_DESCR_32_0 (1<<1)
484 #define TREE_DESCR_16_1 (1<<2)
485 #define TREE_DESCR_64 (1<<3)
486 #define TREE_DESCR_16_2 (1<<4)
487 #define TREE_DESCR_32_1 (1<<5)
488 #define TREE_DESCR_16_3 (1<<6)
489 #define TREE_DESCR_8_0 (1<<7)
490 #define TREE_DESCR_8_1 (1<<8)
491 #define TREE_DESCR_8_2 (1<<9)
492 #define TREE_DESCR_8_3 (1<<10)
493 #define TREE_DESCR_8_4 (1<<11)
494 #define TREE_DESCR_8_5 (1<<12)
495 #define TREE_DESCR_8_6 (1<<13)
496 #define TREE_DESCR_8_7 (1<<14)
497 #define TREE_DESCR_DTY (1<<15)
498
499 typedef
500 struct {
501 SVal dict[4]; /* can represent up to 4 diff values in the line */
502 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
503 dict indexes */
504 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
505 LineF to use, and dict[2..] are also SVal_INVALID. */
506 }
507 LineZ; /* compressed rep for a cache line */
508
509 /* LineZ.dict[1] is used to store various pointers:
510 * In the first lineZ of a free SecMap, it points to the next free SecMap.
511 * In a lineZ for which we need to use a lineF, it points to the lineF. */
512
513
514 typedef
515 struct {
516 SVal w64s[N_LINE_ARANGE];
517 }
518 LineF; /* full rep for a cache line */
519
520 /* We use a pool allocator for LineF, as LineF is relatively small,
521 and we will often alloc/release such lines. */
522 static PoolAlloc* LineF_pool_allocator;
523
524 /* SVal in a lineZ are used to store various pointers.
525 Below are conversion functions to support that. */
LineF_Ptr(LineZ * lineZ)526 static inline LineF *LineF_Ptr (LineZ *lineZ)
527 {
528 tl_assert(lineZ->dict[0] == SVal_INVALID);
529 return SVal2Ptr (lineZ->dict[1]);
530 }
531
532 /* Shadow memory.
533 Primary map is a WordFM Addr SecMap*.
534 SecMaps cover some page-size-ish section of address space and hold
535 a compressed representation.
536 CacheLine-sized chunks of SecMaps are copied into a Cache, being
537 decompressed when moved into the cache and recompressed on the
538 way out. Because of this, the cache must operate as a writeback
539 cache, not a writethrough one.
540
541 Each SecMap must hold a power-of-2 number of CacheLines. Hence
542 N_SECMAP_BITS must >= N_LINE_BITS.
543 */
544 #define N_SECMAP_BITS 13
545 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
546
547 // # CacheLines held by a SecMap
548 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
549
550 /* The data in the SecMap is held in the array of LineZs. Each LineZ
551 either carries the required data directly, in a compressed
552 representation, or it holds (in .dict[1]) a pointer to a LineF
553 that holds the full representation.
554
555 As each in-use LineF is referred to by exactly one LineZ,
556 the number of .linesZ[] that refer to a lineF should equal
557 the number of used lineF.
558
559 RC obligations: the RCs presented to the user include exactly
560 the values in:
561 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
562 * F reps that are in use
563
564 Hence the following actions at the following transitions are required:
565
566 F rep: alloc'd -> freed -- rcdec_LineF
567 F rep: -> alloc'd -- rcinc_LineF
568 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
569 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
570 */
571
572 typedef
573 struct {
574 UInt magic;
575 LineZ linesZ[N_SECMAP_ZLINES];
576 }
577 SecMap;
578
579 #define SecMap_MAGIC 0x571e58cbU
580
581 // (UInt) `echo "Free SecMap" | md5sum`
582 #define SecMap_free_MAGIC 0x5a977f30U
583
584 __attribute__((unused))
is_sane_SecMap(SecMap * sm)585 static inline Bool is_sane_SecMap ( SecMap* sm ) {
586 return sm != NULL && sm->magic == SecMap_MAGIC;
587 }
588
589 /* ------ Cache ------ */
590
591 #define N_WAY_BITS 16
592 #define N_WAY_NENT (1 << N_WAY_BITS)
593
594 /* Each tag is the address of the associated CacheLine, rounded down
595 to a CacheLine address boundary. A CacheLine size must be a power
596 of 2 and must be 8 or more. Hence an easy way to initialise the
597 cache so it is empty is to set all the tag values to any value % 8
598 != 0, eg 1. This means all queries in the cache initially miss.
599 It does however require us to detect and not writeback, any line
600 with a bogus tag. */
601 typedef
602 struct {
603 CacheLine lyns0[N_WAY_NENT];
604 Addr tags0[N_WAY_NENT];
605 }
606 Cache;
607
is_valid_scache_tag(Addr tag)608 static inline Bool is_valid_scache_tag ( Addr tag ) {
609 /* a valid tag should be naturally aligned to the start of
610 a CacheLine. */
611 return 0 == (tag & (N_LINE_ARANGE - 1));
612 }
613
614
615 /* --------- Primary data structures --------- */
616
617 /* Shadow memory primary map */
618 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
619 static Cache cache_shmem;
620
621
622 static UWord stats__secmaps_search = 0; // # SM finds
623 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
624 static UWord stats__secmaps_allocd = 0; // # SecMaps issued
625 static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
626 static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
627 static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
628 static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
629 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
630 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
631 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
632 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
633 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
634 static UWord stats__cache_F_fetches = 0; // # F lines fetched
635 static UWord stats__cache_F_wbacks = 0; // # F lines written back
636 static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
637 static UWord stats__cache_totrefs = 0; // # total accesses
638 static UWord stats__cache_totmisses = 0; // # misses
639 static ULong stats__cache_make_New_arange = 0; // total arange made New
640 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
641 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
642 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
643 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
644 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
645 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
646 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
647 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
648 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
649 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
650 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
651 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
652 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
653 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
654 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
655 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
656 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
657 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
658 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
659 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
660 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
661 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
662 static UWord stats__vts__tick = 0; // # calls to VTS__tick
663 static UWord stats__vts__join = 0; // # calls to VTS__join
664 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
665 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
666 static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
667 static UWord stats__vts_pruning = 0; // # nr of vts pruning
668
669 // # calls to VTS__cmp_structural w/ slow case
670 static UWord stats__vts__cmp_structural_slow = 0;
671
672 // # calls to VTS__indexAt_SLOW
673 static UWord stats__vts__indexat_slow = 0;
674
675 // # calls to vts_set__find__or__clone_and_add
676 static UWord stats__vts_set__focaa = 0;
677
678 // # calls to vts_set__find__or__clone_and_add that lead to an
679 // allocation
680 static UWord stats__vts_set__focaa_a = 0;
681
682
shmem__round_to_SecMap_base(Addr a)683 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
684 return a & ~(N_SECMAP_ARANGE - 1);
685 }
shmem__get_SecMap_offset(Addr a)686 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
687 return a & (N_SECMAP_ARANGE - 1);
688 }
689
690
691 /*----------------------------------------------------------------*/
692 /*--- map_shmem :: WordFM Addr SecMap ---*/
693 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/
694 /*----------------------------------------------------------------*/
695
696 /*--------------- SecMap allocation --------------- */
697
698 static HChar* shmem__bigchunk_next = NULL;
699 static HChar* shmem__bigchunk_end1 = NULL;
700
shmem__bigchunk_alloc(SizeT n)701 static void* shmem__bigchunk_alloc ( SizeT n )
702 {
703 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
704 tl_assert(n > 0);
705 n = VG_ROUNDUP(n, 16);
706 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
707 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
708 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
709 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
710 if (0)
711 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
712 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
713 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
714 if (shmem__bigchunk_next == NULL)
715 VG_(out_of_memory_NORETURN)(
716 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
717 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
718 }
719 tl_assert(shmem__bigchunk_next);
720 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
721 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
722 shmem__bigchunk_next += n;
723 return shmem__bigchunk_next - n;
724 }
725
726 /* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
727 recycled SecMap. When a new SecMap is needed, a recycled SecMap
728 will be used in preference to allocating a new SecMap. */
729 /* We make a linked list of SecMap. The first LineZ is re-used to
730 implement the linked list. */
731 /* Returns the SecMap following sm in the free list.
732 NULL if sm is the last SecMap. sm must be on the free list. */
SecMap_freelist_next(SecMap * sm)733 static inline SecMap *SecMap_freelist_next ( SecMap* sm )
734 {
735 tl_assert (sm);
736 tl_assert (sm->magic == SecMap_free_MAGIC);
737 return SVal2Ptr (sm->linesZ[0].dict[1]);
738 }
set_SecMap_freelist_next(SecMap * sm,SecMap * next)739 static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
740 {
741 tl_assert (sm);
742 tl_assert (sm->magic == SecMap_free_MAGIC);
743 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
744 sm->linesZ[0].dict[1] = Ptr2SVal (next);
745 }
746
747 static SecMap *SecMap_freelist = NULL;
SecMap_freelist_length(void)748 static UWord SecMap_freelist_length(void)
749 {
750 SecMap *sm;
751 UWord n = 0;
752
753 sm = SecMap_freelist;
754 while (sm) {
755 n++;
756 sm = SecMap_freelist_next (sm);
757 }
758 return n;
759 }
760
push_SecMap_on_freelist(SecMap * sm)761 static void push_SecMap_on_freelist(SecMap* sm)
762 {
763 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
764 sm->magic = SecMap_free_MAGIC;
765 set_SecMap_freelist_next(sm, SecMap_freelist);
766 SecMap_freelist = sm;
767 }
768 /* Returns a free SecMap if there is one.
769 Otherwise, returns NULL. */
pop_SecMap_from_freelist(void)770 static SecMap *pop_SecMap_from_freelist(void)
771 {
772 SecMap *sm;
773
774 sm = SecMap_freelist;
775 if (sm) {
776 tl_assert (sm->magic == SecMap_free_MAGIC);
777 SecMap_freelist = SecMap_freelist_next (sm);
778 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
779 }
780 return sm;
781 }
782
shmem__alloc_or_recycle_SecMap(void)783 static SecMap* shmem__alloc_or_recycle_SecMap ( void )
784 {
785 Word i, j;
786 SecMap* sm = pop_SecMap_from_freelist();
787
788 if (!sm) {
789 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
790 stats__secmaps_allocd++;
791 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
792 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
793 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
794 }
795 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
796 tl_assert(sm);
797 sm->magic = SecMap_MAGIC;
798 for (i = 0; i < N_SECMAP_ZLINES; i++) {
799 sm->linesZ[i].dict[0] = SVal_NOACCESS;
800 sm->linesZ[i].dict[1] = SVal_INVALID;
801 sm->linesZ[i].dict[2] = SVal_INVALID;
802 sm->linesZ[i].dict[3] = SVal_INVALID;
803 for (j = 0; j < N_LINE_ARANGE/4; j++)
804 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
805 }
806 return sm;
807 }
808
809 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
810 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
811
shmem__find_SecMap(Addr ga)812 static SecMap* shmem__find_SecMap ( Addr ga )
813 {
814 SecMap* sm = NULL;
815 Addr gaKey = shmem__round_to_SecMap_base(ga);
816 // Cache
817 stats__secmaps_search++;
818 if (LIKELY(gaKey == smCache[0].gaKey))
819 return smCache[0].sm;
820 if (LIKELY(gaKey == smCache[1].gaKey)) {
821 SMCacheEnt tmp = smCache[0];
822 smCache[0] = smCache[1];
823 smCache[1] = tmp;
824 return smCache[0].sm;
825 }
826 if (gaKey == smCache[2].gaKey) {
827 SMCacheEnt tmp = smCache[1];
828 smCache[1] = smCache[2];
829 smCache[2] = tmp;
830 return smCache[1].sm;
831 }
832 // end Cache
833 stats__secmaps_search_slow++;
834 if (VG_(lookupFM)( map_shmem,
835 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
836 tl_assert(sm != NULL);
837 smCache[2] = smCache[1];
838 smCache[1] = smCache[0];
839 smCache[0].gaKey = gaKey;
840 smCache[0].sm = sm;
841 } else {
842 tl_assert(sm == NULL);
843 }
844 return sm;
845 }
846
847 /* Scan the SecMap and count the SecMap that can be GC-ed.
848 If really, really does the GC of the SecMap. */
849 /* NOT TO BE CALLED FROM WITHIN libzsm. */
850 static UWord next_SecMap_GC_at = 1000;
851 __attribute__((noinline))
shmem__SecMap_do_GC(Bool really)852 static UWord shmem__SecMap_do_GC(Bool really)
853 {
854 UWord secmapW = 0;
855 Addr gaKey;
856 UWord examined = 0;
857 UWord ok_GCed = 0;
858
859 /* First invalidate the smCache */
860 smCache[0].gaKey = 1;
861 smCache[1].gaKey = 1;
862 smCache[2].gaKey = 1;
863 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
864
865 VG_(initIterFM)( map_shmem );
866 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
867 UWord i;
868 UWord j;
869 UWord n_linesF = 0;
870 SecMap* sm = (SecMap*)secmapW;
871 tl_assert(sm->magic == SecMap_MAGIC);
872 Bool ok_to_GC = True;
873
874 examined++;
875
876 /* Deal with the LineZs and the possible LineF of a LineZ. */
877 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
878 LineZ* lineZ = &sm->linesZ[i];
879 if (lineZ->dict[0] != SVal_INVALID) {
880 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
881 && !SVal__isC (lineZ->dict[1])
882 && !SVal__isC (lineZ->dict[2])
883 && !SVal__isC (lineZ->dict[3]);
884 } else {
885 LineF *lineF = LineF_Ptr(lineZ);
886 n_linesF++;
887 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
888 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
889 }
890 }
891 if (ok_to_GC)
892 ok_GCed++;
893 if (ok_to_GC && really) {
894 SecMap *fm_sm;
895 Addr fm_gaKey;
896 /* We cannot remove a SecMap from map_shmem while iterating.
897 So, stop iteration, remove from map_shmem, recreate the iteration
898 on the next SecMap. */
899 VG_(doneIterFM) ( map_shmem );
900 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
901 We just need to free the lineF referenced by the linesZ. */
902 if (n_linesF > 0) {
903 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
904 LineZ* lineZ = &sm->linesZ[i];
905 if (lineZ->dict[0] == SVal_INVALID) {
906 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
907 n_linesF--;
908 }
909 }
910 }
911 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
912 tl_assert (0);
913 stats__secmaps_in_map_shmem--;
914 tl_assert (gaKey == fm_gaKey);
915 tl_assert (sm == fm_sm);
916 stats__secmaps_scanGCed++;
917 push_SecMap_on_freelist (sm);
918 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
919 }
920 }
921 VG_(doneIterFM)( map_shmem );
922
923 if (really) {
924 stats__secmaps_scanGC++;
925 /* Next GC when we approach the max allocated */
926 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
927 /* Unless we GCed less than 10%. We then allow to alloc 10%
928 more before GCing. This avoids doing a lot of costly GC
929 for the worst case : the 'growing phase' of an application
930 that allocates a lot of memory.
931 Worst can can be reproduced e.g. by
932 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
933 that allocates around 30Gb of memory. */
934 if (ok_GCed < stats__secmaps_allocd/10)
935 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
936
937 }
938
939 if (VG_(clo_stats) && really) {
940 VG_(message)(Vg_DebugMsg,
941 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
942 " next GC at %lu\n",
943 stats__secmaps_scanGC, examined, ok_GCed,
944 next_SecMap_GC_at);
945 }
946
947 return ok_GCed;
948 }
949
shmem__find_or_alloc_SecMap(Addr ga)950 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
951 {
952 SecMap* sm = shmem__find_SecMap ( ga );
953 if (LIKELY(sm)) {
954 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
955 return sm;
956 } else {
957 /* create a new one */
958 Addr gaKey = shmem__round_to_SecMap_base(ga);
959 sm = shmem__alloc_or_recycle_SecMap();
960 tl_assert(sm);
961 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
962 stats__secmaps_in_map_shmem++;
963 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
964 return sm;
965 }
966 }
967
968 /* Returns the nr of linesF which are in use. Note: this is scanning
969 the secmap wordFM. So, this is to be used for statistics only. */
970 __attribute__((noinline))
shmem__SecMap_used_linesF(void)971 static UWord shmem__SecMap_used_linesF(void)
972 {
973 UWord secmapW = 0;
974 Addr gaKey;
975 UWord inUse = 0;
976
977 VG_(initIterFM)( map_shmem );
978 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
979 UWord i;
980 SecMap* sm = (SecMap*)secmapW;
981 tl_assert(sm->magic == SecMap_MAGIC);
982
983 for (i = 0; i < N_SECMAP_ZLINES; i++) {
984 LineZ* lineZ = &sm->linesZ[i];
985 if (lineZ->dict[0] == SVal_INVALID)
986 inUse++;
987 }
988 }
989 VG_(doneIterFM)( map_shmem );
990
991 return inUse;
992 }
993
994 /* ------------ LineF and LineZ related ------------ */
995
rcinc_LineF(LineF * lineF)996 static void rcinc_LineF ( LineF* lineF ) {
997 UWord i;
998 for (i = 0; i < N_LINE_ARANGE; i++)
999 SVal__rcinc(lineF->w64s[i]);
1000 }
1001
rcdec_LineF(LineF * lineF)1002 static void rcdec_LineF ( LineF* lineF ) {
1003 UWord i;
1004 for (i = 0; i < N_LINE_ARANGE; i++)
1005 SVal__rcdec(lineF->w64s[i]);
1006 }
1007
rcinc_LineZ(LineZ * lineZ)1008 static void rcinc_LineZ ( LineZ* lineZ ) {
1009 tl_assert(lineZ->dict[0] != SVal_INVALID);
1010 SVal__rcinc(lineZ->dict[0]);
1011 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
1012 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
1013 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
1014 }
1015
rcdec_LineZ(LineZ * lineZ)1016 static void rcdec_LineZ ( LineZ* lineZ ) {
1017 tl_assert(lineZ->dict[0] != SVal_INVALID);
1018 SVal__rcdec(lineZ->dict[0]);
1019 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
1020 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
1021 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
1022 }
1023
1024 inline
write_twobit_array(UChar * arr,UWord ix,UWord b2)1025 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
1026 Word bix, shft, mask, prep;
1027 tl_assert(ix >= 0);
1028 bix = ix >> 2;
1029 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1030 mask = 3 << shft;
1031 prep = b2 << shft;
1032 arr[bix] = (arr[bix] & ~mask) | prep;
1033 }
1034
1035 inline
read_twobit_array(UChar * arr,UWord ix)1036 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1037 Word bix, shft;
1038 tl_assert(ix >= 0);
1039 bix = ix >> 2;
1040 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1041 return (arr[bix] >> shft) & 3;
1042 }
1043
1044 /* We cache one free lineF, to avoid pool allocator calls.
1045 Measurement on firefox has shown that this avoids more than 90%
1046 of the PA calls. */
1047 static LineF *free_lineF = NULL;
1048
1049 /* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1050 lineF has to be used. */
alloc_LineF_for_Z(LineZ * lineZ)1051 static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1052 {
1053 LineF *lineF;
1054
1055 tl_assert(lineZ->dict[0] == SVal_INVALID);
1056
1057 if (LIKELY(free_lineF)) {
1058 lineF = free_lineF;
1059 free_lineF = NULL;
1060 } else {
1061 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1062 }
1063 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1064 lineZ->dict[1] = Ptr2SVal (lineF);
1065
1066 return lineF;
1067 }
1068
1069 /* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1070 back to its initial state SVal_NOACCESS (i.e. ready to be
1071 read or written just after SecMap allocation). */
clear_LineF_of_Z(LineZ * lineZ)1072 static inline void clear_LineF_of_Z (LineZ *lineZ)
1073 {
1074 LineF *lineF = LineF_Ptr(lineZ);
1075
1076 rcdec_LineF(lineF);
1077 if (UNLIKELY(free_lineF)) {
1078 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1079 } else {
1080 free_lineF = lineF;
1081 }
1082 lineZ->dict[0] = SVal_NOACCESS;
1083 lineZ->dict[1] = SVal_INVALID;
1084 }
1085
1086 /* Given address 'tag', find either the Z or F line containing relevant
1087 data, so it can be read into the cache.
1088 */
find_ZF_for_reading(LineZ ** zp,LineF ** fp,Addr tag)1089 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1090 /*OUT*/LineF** fp, Addr tag ) {
1091 LineZ* lineZ;
1092 LineF* lineF;
1093 UWord zix;
1094 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1095 UWord smoff = shmem__get_SecMap_offset(tag);
1096 /* since smoff is derived from a valid tag, it should be
1097 cacheline-aligned. */
1098 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1099 zix = smoff >> N_LINE_BITS;
1100 tl_assert(zix < N_SECMAP_ZLINES);
1101 lineZ = &sm->linesZ[zix];
1102 lineF = NULL;
1103 if (lineZ->dict[0] == SVal_INVALID) {
1104 lineF = LineF_Ptr (lineZ);
1105 lineZ = NULL;
1106 }
1107 *zp = lineZ;
1108 *fp = lineF;
1109 }
1110
1111 /* Given address 'tag', return the relevant SecMap and the index of
1112 the LineZ within it, in the expectation that the line is to be
1113 overwritten. Regardless of whether 'tag' is currently associated
1114 with a Z or F representation, to rcdec on the current
1115 representation, in recognition of the fact that the contents are
1116 just about to be overwritten. */
1117 static __attribute__((noinline))
find_Z_for_writing(SecMap ** smp,Word * zixp,Addr tag)1118 void find_Z_for_writing ( /*OUT*/SecMap** smp,
1119 /*OUT*/Word* zixp,
1120 Addr tag ) {
1121 LineZ* lineZ;
1122 UWord zix;
1123 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1124 UWord smoff = shmem__get_SecMap_offset(tag);
1125 /* since smoff is derived from a valid tag, it should be
1126 cacheline-aligned. */
1127 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1128 zix = smoff >> N_LINE_BITS;
1129 tl_assert(zix < N_SECMAP_ZLINES);
1130 lineZ = &sm->linesZ[zix];
1131 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1132 can be parked in it. Hence have to rcdec it accordingly. */
1133 /* If lineZ has an associated lineF, free it up. */
1134 if (lineZ->dict[0] == SVal_INVALID)
1135 clear_LineF_of_Z(lineZ);
1136 else
1137 rcdec_LineZ(lineZ);
1138 *smp = sm;
1139 *zixp = zix;
1140 }
1141
1142 /* ------------ CacheLine and implicit-tree related ------------ */
1143
1144 __attribute__((unused))
pp_CacheLine(CacheLine * cl)1145 static void pp_CacheLine ( CacheLine* cl ) {
1146 Word i;
1147 if (!cl) {
1148 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1149 return;
1150 }
1151 for (i = 0; i < N_LINE_TREES; i++)
1152 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1153 for (i = 0; i < N_LINE_ARANGE; i++)
1154 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1155 }
1156
descr_to_validbits(UShort descr)1157 static UChar descr_to_validbits ( UShort descr )
1158 {
1159 /* a.k.a Party Time for gcc's constant folder */
1160 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1161 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1162 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1163 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1164 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1165 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1166 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1167 ( (b16_2) << 4) | ( (b64) << 3) | \
1168 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1169 ( (b16_0) << 0) ) )
1170
1171 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1172 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1173 ( (bit5) << 5) | ( (bit4) << 4) | \
1174 ( (bit3) << 3) | ( (bit2) << 2) | \
1175 ( (bit1) << 1) | ( (bit0) << 0) ) )
1176
1177 /* these should all get folded out at compile time */
1178 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1179 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1180 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1181 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1182 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1183 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1184 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1185 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1186 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1187
1188 switch (descr) {
1189 /*
1190 +--------------------------------- TREE_DESCR_8_7
1191 | +------------------- TREE_DESCR_8_0
1192 | | +---------------- TREE_DESCR_16_3
1193 | | | +-------------- TREE_DESCR_32_1
1194 | | | | +------------ TREE_DESCR_16_2
1195 | | | | | +--------- TREE_DESCR_64
1196 | | | | | | +------ TREE_DESCR_16_1
1197 | | | | | | | +---- TREE_DESCR_32_0
1198 | | | | | | | | +-- TREE_DESCR_16_0
1199 | | | | | | | | |
1200 | | | | | | | | | GRANULARITY, 7 -> 0 */
1201 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1202 return BYTE(1,1,1,1,1,1,1,1);
1203 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1204 return BYTE(1,1,0,1,1,1,1,1);
1205 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1206 return BYTE(0,1,1,1,1,1,1,1);
1207 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1208 return BYTE(0,1,0,1,1,1,1,1);
1209
1210 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1211 return BYTE(1,1,1,1,1,1,0,1);
1212 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1213 return BYTE(1,1,0,1,1,1,0,1);
1214 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1215 return BYTE(0,1,1,1,1,1,0,1);
1216 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1217 return BYTE(0,1,0,1,1,1,0,1);
1218
1219 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1220 return BYTE(1,1,1,1,0,1,1,1);
1221 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1222 return BYTE(1,1,0,1,0,1,1,1);
1223 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1224 return BYTE(0,1,1,1,0,1,1,1);
1225 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1226 return BYTE(0,1,0,1,0,1,1,1);
1227
1228 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1229 return BYTE(1,1,1,1,0,1,0,1);
1230 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1231 return BYTE(1,1,0,1,0,1,0,1);
1232 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1233 return BYTE(0,1,1,1,0,1,0,1);
1234 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1235 return BYTE(0,1,0,1,0,1,0,1);
1236
1237 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1238 return BYTE(0,0,0,1,1,1,1,1);
1239 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1240 return BYTE(0,0,0,1,1,1,0,1);
1241 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1242 return BYTE(0,0,0,1,0,1,1,1);
1243 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1244 return BYTE(0,0,0,1,0,1,0,1);
1245
1246 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1247 return BYTE(1,1,1,1,0,0,0,1);
1248 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1249 return BYTE(1,1,0,1,0,0,0,1);
1250 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1251 return BYTE(0,1,1,1,0,0,0,1);
1252 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1253 return BYTE(0,1,0,1,0,0,0,1);
1254
1255 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1256 return BYTE(0,0,0,1,0,0,0,1);
1257
1258 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1259 return BYTE(0,0,0,0,0,0,0,1);
1260
1261 default: return BYTE(0,0,0,0,0,0,0,0);
1262 /* INVALID - any valid descr produces at least one
1263 valid bit in tree[0..7]*/
1264 }
1265 /* NOTREACHED*/
1266 tl_assert(0);
1267
1268 # undef DESCR
1269 # undef BYTE
1270 }
1271
1272 __attribute__((unused))
is_sane_Descr(UShort descr)1273 static Bool is_sane_Descr ( UShort descr ) {
1274 return descr_to_validbits(descr) != 0;
1275 }
1276
sprintf_Descr(HChar * dst,UShort descr)1277 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1278 VG_(sprintf)(dst,
1279 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1280 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1281 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1282 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1283 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1284 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1285 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1286 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1287 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1288 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1289 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1290 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1291 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1292 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1293 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1294 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1295 );
1296 }
sprintf_Byte(HChar * dst,UChar byte)1297 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1298 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1299 (Int)((byte & 128) ? 1 : 0),
1300 (Int)((byte & 64) ? 1 : 0),
1301 (Int)((byte & 32) ? 1 : 0),
1302 (Int)((byte & 16) ? 1 : 0),
1303 (Int)((byte & 8) ? 1 : 0),
1304 (Int)((byte & 4) ? 1 : 0),
1305 (Int)((byte & 2) ? 1 : 0),
1306 (Int)((byte & 1) ? 1 : 0)
1307 );
1308 }
1309
is_sane_Descr_and_Tree(UShort descr,SVal * tree)1310 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1311 Word i;
1312 UChar validbits = descr_to_validbits(descr);
1313 HChar buf[128], buf2[128]; // large enough
1314 if (validbits == 0)
1315 goto bad;
1316 for (i = 0; i < 8; i++) {
1317 if (validbits & (1<<i)) {
1318 if (tree[i] == SVal_INVALID)
1319 goto bad;
1320 } else {
1321 if (tree[i] != SVal_INVALID)
1322 goto bad;
1323 }
1324 }
1325 return True;
1326 bad:
1327 sprintf_Descr( buf, descr );
1328 sprintf_Byte( buf2, validbits );
1329 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1330 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1331 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1332 for (i = 0; i < 8; i++)
1333 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1334 VG_(printf)("%s","}\n");
1335 return 0;
1336 }
1337
is_sane_CacheLine(CacheLine * cl)1338 static Bool is_sane_CacheLine ( CacheLine* cl )
1339 {
1340 Word tno, cloff;
1341
1342 if (!cl) goto bad;
1343
1344 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1345 UShort descr = cl->descrs[tno];
1346 SVal* tree = &cl->svals[cloff];
1347 if (!is_sane_Descr_and_Tree(descr, tree))
1348 goto bad;
1349 }
1350 tl_assert(cloff == N_LINE_ARANGE);
1351 return True;
1352 bad:
1353 pp_CacheLine(cl);
1354 return False;
1355 }
1356
normalise_tree(SVal * tree)1357 static UShort normalise_tree ( /*MOD*/SVal* tree )
1358 {
1359 UShort descr;
1360 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1361 particular no zeroes. */
1362 if (CHECK_ZSM
1363 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1364 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1365 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1366 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1367 tl_assert(0);
1368
1369 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1370 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1371 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1372 /* build 16-bit layer */
1373 if (tree[1] == tree[0]) {
1374 tree[1] = SVal_INVALID;
1375 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1376 descr |= TREE_DESCR_16_0;
1377 }
1378 if (tree[3] == tree[2]) {
1379 tree[3] = SVal_INVALID;
1380 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1381 descr |= TREE_DESCR_16_1;
1382 }
1383 if (tree[5] == tree[4]) {
1384 tree[5] = SVal_INVALID;
1385 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1386 descr |= TREE_DESCR_16_2;
1387 }
1388 if (tree[7] == tree[6]) {
1389 tree[7] = SVal_INVALID;
1390 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1391 descr |= TREE_DESCR_16_3;
1392 }
1393 /* build 32-bit layer */
1394 if (tree[2] == tree[0]
1395 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1396 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1397 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1398 descr |= TREE_DESCR_32_0;
1399 }
1400 if (tree[6] == tree[4]
1401 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1402 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1403 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1404 descr |= TREE_DESCR_32_1;
1405 }
1406 /* build 64-bit layer */
1407 if (tree[4] == tree[0]
1408 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1409 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1410 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1411 descr |= TREE_DESCR_64;
1412 }
1413 return descr;
1414 }
1415
1416 /* This takes a cacheline where all the data is at the leaves
1417 (w8[..]) and builds a correctly normalised tree. */
normalise_CacheLine(CacheLine * cl)1418 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1419 {
1420 Word tno, cloff;
1421 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1422 SVal* tree = &cl->svals[cloff];
1423 cl->descrs[tno] = normalise_tree( tree );
1424 }
1425 tl_assert(cloff == N_LINE_ARANGE);
1426 if (CHECK_ZSM)
1427 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1428 stats__cline_normalises++;
1429 }
1430
1431
1432 typedef struct { UChar count; SVal sval; } CountedSVal;
1433
1434 static
sequentialise_CacheLine(CountedSVal * dst,Word * dstUsedP,Word nDst,CacheLine * src)1435 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1436 /*OUT*/Word* dstUsedP,
1437 Word nDst, CacheLine* src )
1438 {
1439 Word tno, cloff, dstUsed;
1440
1441 tl_assert(nDst == N_LINE_ARANGE);
1442 dstUsed = 0;
1443
1444 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1445 UShort descr = src->descrs[tno];
1446 SVal* tree = &src->svals[cloff];
1447
1448 /* sequentialise the tree described by (descr,tree). */
1449 # define PUT(_n,_v) \
1450 do { dst[dstUsed ].count = (_n); \
1451 dst[dstUsed++].sval = (_v); \
1452 } while (0)
1453
1454 /* byte 0 */
1455 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1456 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1457 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1458 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1459 /* byte 1 */
1460 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1461 /* byte 2 */
1462 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1463 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1464 /* byte 3 */
1465 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1466 /* byte 4 */
1467 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1468 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1469 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1470 /* byte 5 */
1471 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1472 /* byte 6 */
1473 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1474 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1475 /* byte 7 */
1476 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1477
1478 # undef PUT
1479 /* END sequentialise the tree described by (descr,tree). */
1480
1481 }
1482 tl_assert(cloff == N_LINE_ARANGE);
1483 tl_assert(dstUsed <= nDst);
1484
1485 *dstUsedP = dstUsed;
1486 }
1487
1488 /* Write the cacheline 'wix' to backing store. Where it ends up
1489 is determined by its tag field. */
cacheline_wback(UWord wix)1490 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1491 {
1492 Word i, j, k, m;
1493 Addr tag;
1494 SecMap* sm;
1495 CacheLine* cl;
1496 LineZ* lineZ;
1497 LineF* lineF;
1498 Word zix, fix, csvalsUsed;
1499 CountedSVal csvals[N_LINE_ARANGE];
1500 SVal sv;
1501
1502 if (0)
1503 VG_(printf)("scache wback line %d\n", (Int)wix);
1504
1505 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1506
1507 tag = cache_shmem.tags0[wix];
1508 cl = &cache_shmem.lyns0[wix];
1509
1510 /* The cache line may have been invalidated; if so, ignore it. */
1511 if (!is_valid_scache_tag(tag))
1512 return;
1513
1514 /* Where are we going to put it? */
1515 sm = NULL;
1516 lineZ = NULL;
1517 lineF = NULL;
1518 zix = fix = -1;
1519
1520 /* find the Z line to write in and rcdec it or the associated F
1521 line. */
1522 find_Z_for_writing( &sm, &zix, tag );
1523
1524 tl_assert(sm);
1525 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1526 lineZ = &sm->linesZ[zix];
1527
1528 /* Generate the data to be stored */
1529 if (CHECK_ZSM)
1530 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1531
1532 csvalsUsed = -1;
1533 sequentialise_CacheLine( csvals, &csvalsUsed,
1534 N_LINE_ARANGE, cl );
1535 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1536 if (0) VG_(printf)("%ld ", csvalsUsed);
1537
1538 lineZ->dict[0] = lineZ->dict[1]
1539 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1540
1541 /* i indexes actual shadow values, k is cursor in csvals */
1542 i = 0;
1543 for (k = 0; k < csvalsUsed; k++) {
1544
1545 sv = csvals[k].sval;
1546 if (CHECK_ZSM)
1547 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1548 /* do we already have it? */
1549 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1550 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1551 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1552 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1553 /* no. look for a free slot. */
1554 if (CHECK_ZSM)
1555 tl_assert(sv != SVal_INVALID);
1556 if (lineZ->dict[0]
1557 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1558 if (lineZ->dict[1]
1559 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1560 if (lineZ->dict[2]
1561 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1562 if (lineZ->dict[3]
1563 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1564 break; /* we'll have to use the f rep */
1565 dict_ok:
1566 m = csvals[k].count;
1567 if (m == 8) {
1568 write_twobit_array( lineZ->ix2s, i+0, j );
1569 write_twobit_array( lineZ->ix2s, i+1, j );
1570 write_twobit_array( lineZ->ix2s, i+2, j );
1571 write_twobit_array( lineZ->ix2s, i+3, j );
1572 write_twobit_array( lineZ->ix2s, i+4, j );
1573 write_twobit_array( lineZ->ix2s, i+5, j );
1574 write_twobit_array( lineZ->ix2s, i+6, j );
1575 write_twobit_array( lineZ->ix2s, i+7, j );
1576 i += 8;
1577 }
1578 else if (m == 4) {
1579 write_twobit_array( lineZ->ix2s, i+0, j );
1580 write_twobit_array( lineZ->ix2s, i+1, j );
1581 write_twobit_array( lineZ->ix2s, i+2, j );
1582 write_twobit_array( lineZ->ix2s, i+3, j );
1583 i += 4;
1584 }
1585 else if (m == 1) {
1586 write_twobit_array( lineZ->ix2s, i+0, j );
1587 i += 1;
1588 }
1589 else if (m == 2) {
1590 write_twobit_array( lineZ->ix2s, i+0, j );
1591 write_twobit_array( lineZ->ix2s, i+1, j );
1592 i += 2;
1593 }
1594 else {
1595 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1596 }
1597
1598 }
1599
1600 if (LIKELY(i == N_LINE_ARANGE)) {
1601 /* Construction of the compressed representation was
1602 successful. */
1603 rcinc_LineZ(lineZ);
1604 stats__cache_Z_wbacks++;
1605 } else {
1606 /* Cannot use the compressed(z) representation. Use the full(f)
1607 rep instead. */
1608 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1609 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1610 lineF = alloc_LineF_for_Z (lineZ);
1611 i = 0;
1612 for (k = 0; k < csvalsUsed; k++) {
1613 if (CHECK_ZSM)
1614 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1615 sv = csvals[k].sval;
1616 if (CHECK_ZSM)
1617 tl_assert(sv != SVal_INVALID);
1618 for (m = csvals[k].count; m > 0; m--) {
1619 lineF->w64s[i] = sv;
1620 i++;
1621 }
1622 }
1623 tl_assert(i == N_LINE_ARANGE);
1624 rcinc_LineF(lineF);
1625 stats__cache_F_wbacks++;
1626 }
1627 }
1628
1629 /* Fetch the cacheline 'wix' from the backing store. The tag
1630 associated with 'wix' is assumed to have already been filled in;
1631 hence that is used to determine where in the backing store to read
1632 from. */
cacheline_fetch(UWord wix)1633 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1634 {
1635 Word i;
1636 Addr tag;
1637 CacheLine* cl;
1638 LineZ* lineZ;
1639 LineF* lineF;
1640
1641 if (0)
1642 VG_(printf)("scache fetch line %d\n", (Int)wix);
1643
1644 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1645
1646 tag = cache_shmem.tags0[wix];
1647 cl = &cache_shmem.lyns0[wix];
1648
1649 /* reject nonsense requests */
1650 tl_assert(is_valid_scache_tag(tag));
1651
1652 lineZ = NULL;
1653 lineF = NULL;
1654 find_ZF_for_reading( &lineZ, &lineF, tag );
1655 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1656
1657 /* expand the data into the bottom layer of the tree, then get
1658 cacheline_normalise to build the descriptor array. */
1659 if (lineF) {
1660 for (i = 0; i < N_LINE_ARANGE; i++) {
1661 cl->svals[i] = lineF->w64s[i];
1662 }
1663 stats__cache_F_fetches++;
1664 } else {
1665 for (i = 0; i < N_LINE_ARANGE; i++) {
1666 UWord ix = read_twobit_array( lineZ->ix2s, i );
1667 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1668 cl->svals[i] = lineZ->dict[ix];
1669 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
1670 }
1671 stats__cache_Z_fetches++;
1672 }
1673 normalise_CacheLine( cl );
1674 }
1675
1676 /* Invalid the cachelines corresponding to the given range, which
1677 must start and end on a cacheline boundary. */
shmem__invalidate_scache_range(Addr ga,SizeT szB)1678 static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1679 {
1680 Word wix;
1681
1682 /* ga must be on a cacheline boundary. */
1683 tl_assert (is_valid_scache_tag (ga));
1684 /* szB must be a multiple of cacheline size. */
1685 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1686
1687
1688 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1689 Word nwix = szB / N_LINE_ARANGE;
1690
1691 if (nwix > N_WAY_NENT)
1692 nwix = N_WAY_NENT; // no need to check several times the same entry.
1693
1694 for (wix = 0; wix < nwix; wix++) {
1695 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1696 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1697 ga_ix++;
1698 if (UNLIKELY(ga_ix == N_WAY_NENT))
1699 ga_ix = 0;
1700 }
1701 }
1702
1703
shmem__flush_and_invalidate_scache(void)1704 static void shmem__flush_and_invalidate_scache ( void ) {
1705 Word wix;
1706 Addr tag;
1707 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1708 tl_assert(!is_valid_scache_tag(1));
1709 for (wix = 0; wix < N_WAY_NENT; wix++) {
1710 tag = cache_shmem.tags0[wix];
1711 if (tag == 1/*INVALID*/) {
1712 /* already invalid; nothing to do */
1713 } else {
1714 tl_assert(is_valid_scache_tag(tag));
1715 cacheline_wback( wix );
1716 }
1717 cache_shmem.tags0[wix] = 1/*INVALID*/;
1718 }
1719 stats__cache_flushes_invals++;
1720 }
1721
1722
aligned16(Addr a)1723 static inline Bool aligned16 ( Addr a ) {
1724 return 0 == (a & 1);
1725 }
aligned32(Addr a)1726 static inline Bool aligned32 ( Addr a ) {
1727 return 0 == (a & 3);
1728 }
aligned64(Addr a)1729 static inline Bool aligned64 ( Addr a ) {
1730 return 0 == (a & 7);
1731 }
get_cacheline_offset(Addr a)1732 static inline UWord get_cacheline_offset ( Addr a ) {
1733 return (UWord)(a & (N_LINE_ARANGE - 1));
1734 }
cacheline_ROUNDUP(Addr a)1735 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1736 return ROUNDUP(a, N_LINE_ARANGE);
1737 }
cacheline_ROUNDDN(Addr a)1738 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1739 return ROUNDDN(a, N_LINE_ARANGE);
1740 }
get_treeno(Addr a)1741 static inline UWord get_treeno ( Addr a ) {
1742 return get_cacheline_offset(a) >> 3;
1743 }
get_tree_offset(Addr a)1744 static inline UWord get_tree_offset ( Addr a ) {
1745 return a & 7;
1746 }
1747
1748 static __attribute__((noinline))
1749 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
get_cacheline(Addr a)1750 static inline CacheLine* get_cacheline ( Addr a )
1751 {
1752 /* tag is 'a' with the in-line offset masked out,
1753 eg a[31]..a[4] 0000 */
1754 Addr tag = a & ~(N_LINE_ARANGE - 1);
1755 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1756 stats__cache_totrefs++;
1757 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1758 return &cache_shmem.lyns0[wix];
1759 } else {
1760 return get_cacheline_MISS( a );
1761 }
1762 }
1763
1764 static __attribute__((noinline))
get_cacheline_MISS(Addr a)1765 CacheLine* get_cacheline_MISS ( Addr a )
1766 {
1767 /* tag is 'a' with the in-line offset masked out,
1768 eg a[31]..a[4] 0000 */
1769
1770 CacheLine* cl;
1771 Addr* tag_old_p;
1772 Addr tag = a & ~(N_LINE_ARANGE - 1);
1773 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1774
1775 tl_assert(tag != cache_shmem.tags0[wix]);
1776
1777 /* Dump the old line into the backing store. */
1778 stats__cache_totmisses++;
1779
1780 cl = &cache_shmem.lyns0[wix];
1781 tag_old_p = &cache_shmem.tags0[wix];
1782
1783 if (is_valid_scache_tag( *tag_old_p )) {
1784 /* EXPENSIVE and REDUNDANT: callee does it */
1785 if (CHECK_ZSM)
1786 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1787 cacheline_wback( wix );
1788 }
1789 /* and reload the new one */
1790 *tag_old_p = tag;
1791 cacheline_fetch( wix );
1792 if (CHECK_ZSM)
1793 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1794 return cl;
1795 }
1796
pulldown_to_32(SVal * tree,UWord toff,UShort descr)1797 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1798 stats__cline_64to32pulldown++;
1799 switch (toff) {
1800 case 0: case 4:
1801 tl_assert(descr & TREE_DESCR_64);
1802 tree[4] = tree[0];
1803 descr &= ~TREE_DESCR_64;
1804 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1805 break;
1806 default:
1807 tl_assert(0);
1808 }
1809 return descr;
1810 }
1811
pulldown_to_16(SVal * tree,UWord toff,UShort descr)1812 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1813 stats__cline_32to16pulldown++;
1814 switch (toff) {
1815 case 0: case 2:
1816 if (!(descr & TREE_DESCR_32_0)) {
1817 descr = pulldown_to_32(tree, 0, descr);
1818 }
1819 tl_assert(descr & TREE_DESCR_32_0);
1820 tree[2] = tree[0];
1821 descr &= ~TREE_DESCR_32_0;
1822 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1823 break;
1824 case 4: case 6:
1825 if (!(descr & TREE_DESCR_32_1)) {
1826 descr = pulldown_to_32(tree, 4, descr);
1827 }
1828 tl_assert(descr & TREE_DESCR_32_1);
1829 tree[6] = tree[4];
1830 descr &= ~TREE_DESCR_32_1;
1831 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1832 break;
1833 default:
1834 tl_assert(0);
1835 }
1836 return descr;
1837 }
1838
pulldown_to_8(SVal * tree,UWord toff,UShort descr)1839 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1840 stats__cline_16to8pulldown++;
1841 switch (toff) {
1842 case 0: case 1:
1843 if (!(descr & TREE_DESCR_16_0)) {
1844 descr = pulldown_to_16(tree, 0, descr);
1845 }
1846 tl_assert(descr & TREE_DESCR_16_0);
1847 tree[1] = tree[0];
1848 descr &= ~TREE_DESCR_16_0;
1849 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1850 break;
1851 case 2: case 3:
1852 if (!(descr & TREE_DESCR_16_1)) {
1853 descr = pulldown_to_16(tree, 2, descr);
1854 }
1855 tl_assert(descr & TREE_DESCR_16_1);
1856 tree[3] = tree[2];
1857 descr &= ~TREE_DESCR_16_1;
1858 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1859 break;
1860 case 4: case 5:
1861 if (!(descr & TREE_DESCR_16_2)) {
1862 descr = pulldown_to_16(tree, 4, descr);
1863 }
1864 tl_assert(descr & TREE_DESCR_16_2);
1865 tree[5] = tree[4];
1866 descr &= ~TREE_DESCR_16_2;
1867 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1868 break;
1869 case 6: case 7:
1870 if (!(descr & TREE_DESCR_16_3)) {
1871 descr = pulldown_to_16(tree, 6, descr);
1872 }
1873 tl_assert(descr & TREE_DESCR_16_3);
1874 tree[7] = tree[6];
1875 descr &= ~TREE_DESCR_16_3;
1876 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1877 break;
1878 default:
1879 tl_assert(0);
1880 }
1881 return descr;
1882 }
1883
1884
pullup_descr_to_16(UShort descr,UWord toff)1885 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1886 UShort mask;
1887 switch (toff) {
1888 case 0:
1889 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1890 tl_assert( (descr & mask) == mask );
1891 descr &= ~mask;
1892 descr |= TREE_DESCR_16_0;
1893 break;
1894 case 2:
1895 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1896 tl_assert( (descr & mask) == mask );
1897 descr &= ~mask;
1898 descr |= TREE_DESCR_16_1;
1899 break;
1900 case 4:
1901 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1902 tl_assert( (descr & mask) == mask );
1903 descr &= ~mask;
1904 descr |= TREE_DESCR_16_2;
1905 break;
1906 case 6:
1907 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1908 tl_assert( (descr & mask) == mask );
1909 descr &= ~mask;
1910 descr |= TREE_DESCR_16_3;
1911 break;
1912 default:
1913 tl_assert(0);
1914 }
1915 return descr;
1916 }
1917
pullup_descr_to_32(UShort descr,UWord toff)1918 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1919 UShort mask;
1920 switch (toff) {
1921 case 0:
1922 if (!(descr & TREE_DESCR_16_0))
1923 descr = pullup_descr_to_16(descr, 0);
1924 if (!(descr & TREE_DESCR_16_1))
1925 descr = pullup_descr_to_16(descr, 2);
1926 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1927 tl_assert( (descr & mask) == mask );
1928 descr &= ~mask;
1929 descr |= TREE_DESCR_32_0;
1930 break;
1931 case 4:
1932 if (!(descr & TREE_DESCR_16_2))
1933 descr = pullup_descr_to_16(descr, 4);
1934 if (!(descr & TREE_DESCR_16_3))
1935 descr = pullup_descr_to_16(descr, 6);
1936 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1937 tl_assert( (descr & mask) == mask );
1938 descr &= ~mask;
1939 descr |= TREE_DESCR_32_1;
1940 break;
1941 default:
1942 tl_assert(0);
1943 }
1944 return descr;
1945 }
1946
valid_value_is_above_me_32(UShort descr,UWord toff)1947 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1948 switch (toff) {
1949 case 0: case 4:
1950 return 0 != (descr & TREE_DESCR_64);
1951 default:
1952 tl_assert(0);
1953 }
1954 }
1955
valid_value_is_below_me_16(UShort descr,UWord toff)1956 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1957 switch (toff) {
1958 case 0:
1959 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1960 case 2:
1961 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1962 case 4:
1963 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1964 case 6:
1965 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1966 default:
1967 tl_assert(0);
1968 }
1969 }
1970
1971 /* ------------ Cache management ------------ */
1972
zsm_flush_cache(void)1973 static void zsm_flush_cache ( void )
1974 {
1975 shmem__flush_and_invalidate_scache();
1976 }
1977
1978
zsm_init(void)1979 static void zsm_init ( void )
1980 {
1981 tl_assert( sizeof(UWord) == sizeof(Addr) );
1982
1983 tl_assert(map_shmem == NULL);
1984 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1985 HG_(free),
1986 NULL/*unboxed UWord cmp*/);
1987 /* Invalidate all cache entries. */
1988 tl_assert(!is_valid_scache_tag(1));
1989 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1990 cache_shmem.tags0[wix] = 1/*INVALID*/;
1991 }
1992
1993 LineF_pool_allocator = VG_(newPA) (
1994 sizeof(LineF),
1995 /* Nr elements/pool to fill a core arena block
1996 taking some arena overhead into account. */
1997 (4 * 1024 * 1024 - 200)/sizeof(LineF),
1998 HG_(zalloc),
1999 "libhb.LineF_storage.pool",
2000 HG_(free)
2001 );
2002
2003 /* a SecMap must contain an integral number of CacheLines */
2004 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
2005 /* also ... a CacheLine holds an integral number of trees */
2006 tl_assert(0 == (N_LINE_ARANGE % 8));
2007 }
2008
2009 /////////////////////////////////////////////////////////////////
2010 /////////////////////////////////////////////////////////////////
2011 // //
2012 // SECTION END compressed shadow memory //
2013 // //
2014 /////////////////////////////////////////////////////////////////
2015 /////////////////////////////////////////////////////////////////
2016
2017
2018
2019 /////////////////////////////////////////////////////////////////
2020 /////////////////////////////////////////////////////////////////
2021 // //
2022 // SECTION BEGIN vts primitives //
2023 // //
2024 /////////////////////////////////////////////////////////////////
2025 /////////////////////////////////////////////////////////////////
2026
2027
2028 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
2029 being compact stand-ins for Thr*'s. Use these functions to map
2030 between them. */
2031 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
2032 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
2033
2034 __attribute__((noreturn))
scalarts_limitations_fail_NORETURN(Bool due_to_nThrs)2035 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2036 {
2037 if (due_to_nThrs) {
2038 const HChar* s =
2039 "\n"
2040 "Helgrind: cannot continue, run aborted: too many threads.\n"
2041 "Sorry. Helgrind can only handle programs that create\n"
2042 "%'llu or fewer threads over their entire lifetime.\n"
2043 "\n";
2044 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
2045 } else {
2046 const HChar* s =
2047 "\n"
2048 "Helgrind: cannot continue, run aborted: too many\n"
2049 "synchronisation events. Sorry. Helgrind can only handle\n"
2050 "programs which perform %'llu or fewer\n"
2051 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2052 "\n";
2053 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2054 }
2055 VG_(exit)(1);
2056 /*NOTREACHED*/
2057 tl_assert(0); /*wtf?!*/
2058 }
2059
2060
2061 /* The dead thread (ThrID, actually) tables. A thread may only be
2062 listed here if we have been notified thereof by libhb_async_exit.
2063 New entries are added at the end. The order isn't important, but
2064 the ThrID values must be unique.
2065 verydead_thread_table_not_pruned lists the identity of the threads
2066 that died since the previous round of pruning.
2067 Once pruning is done, these ThrID are added in verydead_thread_table.
2068 We don't actually need to keep the set of threads that have ever died --
2069 only the threads that have died since the previous round of
2070 pruning. But it's useful for sanity check purposes to keep the
2071 entire set, so we do. */
2072 static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
2073 static XArray* /* of ThrID */ verydead_thread_table = NULL;
2074
2075 /* Arbitrary total ordering on ThrIDs. */
cmp__ThrID(const void * v1,const void * v2)2076 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2077 ThrID id1 = *(const ThrID*)v1;
2078 ThrID id2 = *(const ThrID*)v2;
2079 if (id1 < id2) return -1;
2080 if (id1 > id2) return 1;
2081 return 0;
2082 }
2083
verydead_thread_tables_init(void)2084 static void verydead_thread_tables_init ( void )
2085 {
2086 tl_assert(!verydead_thread_table);
2087 tl_assert(!verydead_thread_table_not_pruned);
2088 verydead_thread_table
2089 = VG_(newXA)( HG_(zalloc),
2090 "libhb.verydead_thread_table_init.1",
2091 HG_(free), sizeof(ThrID) );
2092 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
2093 verydead_thread_table_not_pruned
2094 = VG_(newXA)( HG_(zalloc),
2095 "libhb.verydead_thread_table_init.2",
2096 HG_(free), sizeof(ThrID) );
2097 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
2098 }
2099
verydead_thread_table_sort_and_check(XArray * thrids)2100 static void verydead_thread_table_sort_and_check (XArray* thrids)
2101 {
2102 UWord i;
2103
2104 VG_(sortXA)( thrids );
2105 /* Sanity check: check for unique .sts.thr values. */
2106 UWord nBT = VG_(sizeXA)( thrids );
2107 if (nBT > 0) {
2108 ThrID thrid1, thrid2;
2109 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2110 for (i = 1; i < nBT; i++) {
2111 thrid1 = thrid2;
2112 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2113 tl_assert(thrid1 < thrid2);
2114 }
2115 }
2116 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2117 }
2118
2119 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
2120 a backlink for the caller's convenience. Since we have no idea
2121 what to set that to in the library, it always gets set to
2122 VtsID_INVALID. */
2123 typedef
2124 struct {
2125 VtsID id;
2126 UInt usedTS;
2127 UInt sizeTS;
2128 ScalarTS ts[0];
2129 }
2130 VTS;
2131
2132 /* Allocate a VTS capable of storing 'sizeTS' entries. */
2133 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
2134
2135 /* Make a clone of 'vts', sizing the new array to exactly match the
2136 number of ScalarTSs present. */
2137 static VTS* VTS__clone ( const HChar* who, VTS* vts );
2138
2139 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2140 array is sized exactly to hold the number of required elements.
2141 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2142 must be in strictly increasing order. */
2143 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
2144
2145 /* Delete this VTS in its entirety. */
2146 static void VTS__delete ( VTS* vts );
2147
2148 /* Create a new singleton VTS in 'out'. Caller must have
2149 pre-allocated 'out' sufficiently big to hold the result in all
2150 possible cases. */
2151 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
2152
2153 /* Create in 'out' a VTS which is the same as 'vts' except with
2154 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2155 sufficiently big to hold the result in all possible cases. */
2156 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
2157
2158 /* Create in 'out' a VTS which is the join (max) of 'a' and
2159 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2160 the result in all possible cases. */
2161 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
2162
2163 /* Compute the partial ordering relation of the two args. Although we
2164 could be completely general and return an enumeration value (EQ,
2165 LT, GT, UN), in fact we only need LEQ, and so we may as well
2166 hardwire that fact.
2167
2168 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2169 invald ThrID). In the latter case, the returned ThrID indicates
2170 the discovered point for which they are not. There may be more
2171 than one such point, but we only care about seeing one of them, not
2172 all of them. This rather strange convention is used because
2173 sometimes we want to know the actual index at which they first
2174 differ. */
2175 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
2176
2177 /* Compute an arbitrary structural (total) ordering on the two args,
2178 based on their VCs, so they can be looked up in a table, tree, etc.
2179 Returns -1, 0 or 1. */
2180 static Word VTS__cmp_structural ( VTS* a, VTS* b );
2181
2182 /* Debugging only. Display the given VTS. */
2183 static void VTS__show ( const VTS* vts );
2184
2185 /* Debugging only. Return vts[index], so to speak. */
2186 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
2187
2188 /* Notify the VTS machinery that a thread has been declared
2189 comprehensively dead: that is, it has done an async exit AND it has
2190 been joined with. This should ensure that its local clocks (.viR
2191 and .viW) will never again change, and so all mentions of this
2192 thread from all VTSs in the system may be removed. */
2193 static void VTS__declare_thread_very_dead ( Thr* idx );
2194
2195 /*--------------- to do with Vector Timestamps ---------------*/
2196
is_sane_VTS(VTS * vts)2197 static Bool is_sane_VTS ( VTS* vts )
2198 {
2199 UWord i, n;
2200 ScalarTS *st1, *st2;
2201 if (!vts) return False;
2202 if (vts->usedTS > vts->sizeTS) return False;
2203 n = vts->usedTS;
2204 if (n == 1) {
2205 st1 = &vts->ts[0];
2206 if (st1->tym == 0)
2207 return False;
2208 }
2209 else
2210 if (n >= 2) {
2211 for (i = 0; i < n-1; i++) {
2212 st1 = &vts->ts[i];
2213 st2 = &vts->ts[i+1];
2214 if (st1->thrid >= st2->thrid)
2215 return False;
2216 if (st1->tym == 0 || st2->tym == 0)
2217 return False;
2218 }
2219 }
2220 return True;
2221 }
2222
2223
2224 /* Create a new, empty VTS.
2225 */
VTS__new(const HChar * who,UInt sizeTS)2226 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
2227 {
2228 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2229 tl_assert(vts->usedTS == 0);
2230 vts->sizeTS = sizeTS;
2231 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
2232 return vts;
2233 }
2234
2235 /* Clone this VTS.
2236 */
VTS__clone(const HChar * who,VTS * vts)2237 static VTS* VTS__clone ( const HChar* who, VTS* vts )
2238 {
2239 tl_assert(vts);
2240 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2241 UInt nTS = vts->usedTS;
2242 VTS* clone = VTS__new(who, nTS);
2243 clone->id = vts->id;
2244 clone->sizeTS = nTS;
2245 clone->usedTS = nTS;
2246 UInt i;
2247 for (i = 0; i < nTS; i++) {
2248 clone->ts[i] = vts->ts[i];
2249 }
2250 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2251 return clone;
2252 }
2253
2254
2255 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2256 must be in strictly increasing order. We could obviously do this
2257 much more efficiently (in linear time) if necessary.
2258 */
VTS__subtract(const HChar * who,VTS * vts,XArray * thridsToDel)2259 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
2260 {
2261 UInt i, j;
2262 tl_assert(vts);
2263 tl_assert(thridsToDel);
2264 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2265 UInt nTS = vts->usedTS;
2266 /* Figure out how many ScalarTSs will remain in the output. */
2267 UInt nReq = nTS;
2268 for (i = 0; i < nTS; i++) {
2269 ThrID thrid = vts->ts[i].thrid;
2270 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2271 nReq--;
2272 }
2273 tl_assert(nReq <= nTS);
2274 /* Copy the ones that will remain. */
2275 VTS* res = VTS__new(who, nReq);
2276 j = 0;
2277 for (i = 0; i < nTS; i++) {
2278 ThrID thrid = vts->ts[i].thrid;
2279 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2280 continue;
2281 res->ts[j++] = vts->ts[i];
2282 }
2283 tl_assert(j == nReq);
2284 tl_assert(j == res->sizeTS);
2285 res->usedTS = j;
2286 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2287 return res;
2288 }
2289
2290
2291 /* Delete this VTS in its entirety.
2292 */
VTS__delete(VTS * vts)2293 static void VTS__delete ( VTS* vts )
2294 {
2295 tl_assert(vts);
2296 tl_assert(vts->usedTS <= vts->sizeTS);
2297 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2298 HG_(free)(vts);
2299 }
2300
2301
2302 /* Create a new singleton VTS.
2303 */
VTS__singleton(VTS * out,Thr * thr,ULong tym)2304 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2305 {
2306 tl_assert(thr);
2307 tl_assert(tym >= 1);
2308 tl_assert(out);
2309 tl_assert(out->usedTS == 0);
2310 tl_assert(out->sizeTS >= 1);
2311 UInt hi = out->usedTS++;
2312 out->ts[hi].thrid = Thr__to_ThrID(thr);
2313 out->ts[hi].tym = tym;
2314 }
2315
2316
2317 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2318 not modified.
2319 */
VTS__tick(VTS * out,Thr * me,VTS * vts)2320 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2321 {
2322 UInt i, n;
2323 ThrID me_thrid;
2324 Bool found = False;
2325
2326 stats__vts__tick++;
2327
2328 tl_assert(out);
2329 tl_assert(out->usedTS == 0);
2330 if (vts->usedTS >= ThrID_MAX_VALID)
2331 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2332 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2333
2334 tl_assert(me);
2335 me_thrid = Thr__to_ThrID(me);
2336 tl_assert(is_sane_VTS(vts));
2337 n = vts->usedTS;
2338
2339 /* Copy all entries which precede 'me'. */
2340 for (i = 0; i < n; i++) {
2341 ScalarTS* here = &vts->ts[i];
2342 if (UNLIKELY(here->thrid >= me_thrid))
2343 break;
2344 UInt hi = out->usedTS++;
2345 out->ts[hi] = *here;
2346 }
2347
2348 /* 'i' now indicates the next entry to copy, if any.
2349 There are 3 possibilities:
2350 (a) there is no next entry (we used them all up already):
2351 add (me_thrid,1) to the output, and quit
2352 (b) there is a next entry, and its thrid > me_thrid:
2353 add (me_thrid,1) to the output, then copy the remaining entries
2354 (c) there is a next entry, and its thrid == me_thrid:
2355 copy it to the output but increment its timestamp value.
2356 Then copy the remaining entries. (c) is the common case.
2357 */
2358 tl_assert(i >= 0 && i <= n);
2359 if (i == n) { /* case (a) */
2360 UInt hi = out->usedTS++;
2361 out->ts[hi].thrid = me_thrid;
2362 out->ts[hi].tym = 1;
2363 } else {
2364 /* cases (b) and (c) */
2365 ScalarTS* here = &vts->ts[i];
2366 if (me_thrid == here->thrid) { /* case (c) */
2367 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2368 /* We're hosed. We have to stop. */
2369 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2370 }
2371 UInt hi = out->usedTS++;
2372 out->ts[hi].thrid = here->thrid;
2373 out->ts[hi].tym = here->tym + 1;
2374 i++;
2375 found = True;
2376 } else { /* case (b) */
2377 UInt hi = out->usedTS++;
2378 out->ts[hi].thrid = me_thrid;
2379 out->ts[hi].tym = 1;
2380 }
2381 /* And copy any remaining entries. */
2382 for (/*keepgoing*/; i < n; i++) {
2383 ScalarTS* here2 = &vts->ts[i];
2384 UInt hi = out->usedTS++;
2385 out->ts[hi] = *here2;
2386 }
2387 }
2388
2389 tl_assert(is_sane_VTS(out));
2390 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2391 tl_assert(out->usedTS <= out->sizeTS);
2392 }
2393
2394
2395 /* Return a new VTS constructed as the join (max) of the 2 args.
2396 Neither arg is modified.
2397 */
VTS__join(VTS * out,VTS * a,VTS * b)2398 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2399 {
2400 UInt ia, ib, useda, usedb;
2401 ULong tyma, tymb, tymMax;
2402 ThrID thrid;
2403 UInt ncommon = 0;
2404
2405 stats__vts__join++;
2406
2407 tl_assert(a);
2408 tl_assert(b);
2409 useda = a->usedTS;
2410 usedb = b->usedTS;
2411
2412 tl_assert(out);
2413 tl_assert(out->usedTS == 0);
2414 /* overly conservative test, but doing better involves comparing
2415 the two VTSs, which we don't want to do at this point. */
2416 if (useda + usedb >= ThrID_MAX_VALID)
2417 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2418 tl_assert(out->sizeTS >= useda + usedb);
2419
2420 ia = ib = 0;
2421
2422 while (1) {
2423
2424 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2425 from a and b in order, where thrid is the next ThrID
2426 occurring in either a or b, and tyma/b are the relevant
2427 scalar timestamps, taking into account implicit zeroes. */
2428 tl_assert(ia >= 0 && ia <= useda);
2429 tl_assert(ib >= 0 && ib <= usedb);
2430
2431 if (ia == useda && ib == usedb) {
2432 /* both empty - done */
2433 break;
2434
2435 } else if (ia == useda && ib != usedb) {
2436 /* a empty, use up b */
2437 ScalarTS* tmpb = &b->ts[ib];
2438 thrid = tmpb->thrid;
2439 tyma = 0;
2440 tymb = tmpb->tym;
2441 ib++;
2442
2443 } else if (ia != useda && ib == usedb) {
2444 /* b empty, use up a */
2445 ScalarTS* tmpa = &a->ts[ia];
2446 thrid = tmpa->thrid;
2447 tyma = tmpa->tym;
2448 tymb = 0;
2449 ia++;
2450
2451 } else {
2452 /* both not empty; extract lowest-ThrID'd triple */
2453 ScalarTS* tmpa = &a->ts[ia];
2454 ScalarTS* tmpb = &b->ts[ib];
2455 if (tmpa->thrid < tmpb->thrid) {
2456 /* a has the lowest unconsidered ThrID */
2457 thrid = tmpa->thrid;
2458 tyma = tmpa->tym;
2459 tymb = 0;
2460 ia++;
2461 } else if (tmpa->thrid > tmpb->thrid) {
2462 /* b has the lowest unconsidered ThrID */
2463 thrid = tmpb->thrid;
2464 tyma = 0;
2465 tymb = tmpb->tym;
2466 ib++;
2467 } else {
2468 /* they both next mention the same ThrID */
2469 tl_assert(tmpa->thrid == tmpb->thrid);
2470 thrid = tmpa->thrid; /* == tmpb->thrid */
2471 tyma = tmpa->tym;
2472 tymb = tmpb->tym;
2473 ia++;
2474 ib++;
2475 ncommon++;
2476 }
2477 }
2478
2479 /* having laboriously determined (thr, tyma, tymb), do something
2480 useful with it. */
2481 tymMax = tyma > tymb ? tyma : tymb;
2482 if (tymMax > 0) {
2483 UInt hi = out->usedTS++;
2484 out->ts[hi].thrid = thrid;
2485 out->ts[hi].tym = tymMax;
2486 }
2487
2488 }
2489
2490 tl_assert(is_sane_VTS(out));
2491 tl_assert(out->usedTS <= out->sizeTS);
2492 tl_assert(out->usedTS == useda + usedb - ncommon);
2493 }
2494
2495
2496 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2497 they are, or the first ThrID for which they are not (no valid ThrID
2498 has the value zero). This rather strange convention is used
2499 because sometimes we want to know the actual index at which they
2500 first differ. */
VTS__cmpLEQ(VTS * a,VTS * b)2501 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2502 {
2503 Word ia, ib, useda, usedb;
2504 ULong tyma, tymb;
2505
2506 stats__vts__cmpLEQ++;
2507
2508 tl_assert(a);
2509 tl_assert(b);
2510 useda = a->usedTS;
2511 usedb = b->usedTS;
2512
2513 ia = ib = 0;
2514
2515 while (1) {
2516
2517 /* This logic is to enumerate doubles (tyma, tymb) drawn
2518 from a and b in order, and tyma/b are the relevant
2519 scalar timestamps, taking into account implicit zeroes. */
2520 ThrID thrid;
2521
2522 tl_assert(ia >= 0 && ia <= useda);
2523 tl_assert(ib >= 0 && ib <= usedb);
2524
2525 if (ia == useda && ib == usedb) {
2526 /* both empty - done */
2527 break;
2528
2529 } else if (ia == useda && ib != usedb) {
2530 /* a empty, use up b */
2531 ScalarTS* tmpb = &b->ts[ib];
2532 tyma = 0;
2533 tymb = tmpb->tym;
2534 thrid = tmpb->thrid;
2535 ib++;
2536
2537 } else if (ia != useda && ib == usedb) {
2538 /* b empty, use up a */
2539 ScalarTS* tmpa = &a->ts[ia];
2540 tyma = tmpa->tym;
2541 thrid = tmpa->thrid;
2542 tymb = 0;
2543 ia++;
2544
2545 } else {
2546 /* both not empty; extract lowest-ThrID'd triple */
2547 ScalarTS* tmpa = &a->ts[ia];
2548 ScalarTS* tmpb = &b->ts[ib];
2549 if (tmpa->thrid < tmpb->thrid) {
2550 /* a has the lowest unconsidered ThrID */
2551 tyma = tmpa->tym;
2552 thrid = tmpa->thrid;
2553 tymb = 0;
2554 ia++;
2555 }
2556 else
2557 if (tmpa->thrid > tmpb->thrid) {
2558 /* b has the lowest unconsidered ThrID */
2559 tyma = 0;
2560 tymb = tmpb->tym;
2561 thrid = tmpb->thrid;
2562 ib++;
2563 } else {
2564 /* they both next mention the same ThrID */
2565 tl_assert(tmpa->thrid == tmpb->thrid);
2566 tyma = tmpa->tym;
2567 thrid = tmpa->thrid;
2568 tymb = tmpb->tym;
2569 ia++;
2570 ib++;
2571 }
2572 }
2573
2574 /* having laboriously determined (tyma, tymb), do something
2575 useful with it. */
2576 if (tyma > tymb) {
2577 /* not LEQ at this index. Quit, since the answer is
2578 determined already. */
2579 tl_assert(thrid >= 1024);
2580 return thrid;
2581 }
2582 }
2583
2584 return 0; /* all points are LEQ => return an invalid ThrID */
2585 }
2586
2587
2588 /* Compute an arbitrary structural (total) ordering on the two args,
2589 based on their VCs, so they can be looked up in a table, tree, etc.
2590 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2591 performance critical so there is some effort expended to make it sa
2592 fast as possible.
2593 */
VTS__cmp_structural(VTS * a,VTS * b)2594 Word VTS__cmp_structural ( VTS* a, VTS* b )
2595 {
2596 /* We just need to generate an arbitrary total ordering based on
2597 a->ts and b->ts. Preferably do it in a way which comes across likely
2598 differences relatively quickly. */
2599 Word i;
2600 Word useda = 0, usedb = 0;
2601 ScalarTS *ctsa = NULL, *ctsb = NULL;
2602
2603 stats__vts__cmp_structural++;
2604
2605 tl_assert(a);
2606 tl_assert(b);
2607
2608 ctsa = &a->ts[0]; useda = a->usedTS;
2609 ctsb = &b->ts[0]; usedb = b->usedTS;
2610
2611 if (LIKELY(useda == usedb)) {
2612 ScalarTS *tmpa = NULL, *tmpb = NULL;
2613 stats__vts__cmp_structural_slow++;
2614 /* Same length vectors. Find the first difference, if any, as
2615 fast as possible. */
2616 for (i = 0; i < useda; i++) {
2617 tmpa = &ctsa[i];
2618 tmpb = &ctsb[i];
2619 if (LIKELY(tmpa->tym == tmpb->tym
2620 && tmpa->thrid == tmpb->thrid))
2621 continue;
2622 else
2623 break;
2624 }
2625 if (UNLIKELY(i == useda)) {
2626 /* They're identical. */
2627 return 0;
2628 } else {
2629 tl_assert(i >= 0 && i < useda);
2630 if (tmpa->tym < tmpb->tym) return -1;
2631 if (tmpa->tym > tmpb->tym) return 1;
2632 if (tmpa->thrid < tmpb->thrid) return -1;
2633 if (tmpa->thrid > tmpb->thrid) return 1;
2634 /* we just established them as non-identical, hence: */
2635 }
2636 /*NOTREACHED*/
2637 tl_assert(0);
2638 }
2639
2640 if (useda < usedb) return -1;
2641 if (useda > usedb) return 1;
2642 /*NOTREACHED*/
2643 tl_assert(0);
2644 }
2645
2646
2647 /* Debugging only. Display the given VTS.
2648 */
VTS__show(const VTS * vts)2649 static void VTS__show ( const VTS* vts )
2650 {
2651 Word i, n;
2652 tl_assert(vts);
2653
2654 VG_(printf)("[");
2655 n = vts->usedTS;
2656 for (i = 0; i < n; i++) {
2657 const ScalarTS *st = &vts->ts[i];
2658 VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
2659 }
2660 VG_(printf)("]");
2661 }
2662
2663
2664 /* Debugging only. Return vts[index], so to speak.
2665 */
VTS__indexAt_SLOW(VTS * vts,Thr * idx)2666 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2667 {
2668 UWord i, n;
2669 ThrID idx_thrid = Thr__to_ThrID(idx);
2670 stats__vts__indexat_slow++;
2671 tl_assert(vts);
2672 n = vts->usedTS;
2673 for (i = 0; i < n; i++) {
2674 ScalarTS* st = &vts->ts[i];
2675 if (st->thrid == idx_thrid)
2676 return st->tym;
2677 }
2678 return 0;
2679 }
2680
2681
2682 /* See comment on prototype above.
2683 */
VTS__declare_thread_very_dead(Thr * thr)2684 static void VTS__declare_thread_very_dead ( Thr* thr )
2685 {
2686 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2687
2688 tl_assert(thr->llexit_done);
2689 tl_assert(thr->joinedwith_done);
2690
2691 ThrID nyu;
2692 nyu = Thr__to_ThrID(thr);
2693 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
2694
2695 /* We can only get here if we're assured that we'll never again
2696 need to look at this thread's ::viR or ::viW. Set them to
2697 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2698 mostly so that we don't wind up pruning them (as that would be
2699 nonsensical: the only interesting ScalarTS entry for a dead
2700 thread is its own index, and the pruning will remove that.). */
2701 VtsID__rcdec(thr->viR);
2702 VtsID__rcdec(thr->viW);
2703 thr->viR = VtsID_INVALID;
2704 thr->viW = VtsID_INVALID;
2705 }
2706
2707
2708 /////////////////////////////////////////////////////////////////
2709 /////////////////////////////////////////////////////////////////
2710 // //
2711 // SECTION END vts primitives //
2712 // //
2713 /////////////////////////////////////////////////////////////////
2714 /////////////////////////////////////////////////////////////////
2715
2716
2717
2718 /////////////////////////////////////////////////////////////////
2719 /////////////////////////////////////////////////////////////////
2720 // //
2721 // SECTION BEGIN main library //
2722 // //
2723 /////////////////////////////////////////////////////////////////
2724 /////////////////////////////////////////////////////////////////
2725
2726
2727 /////////////////////////////////////////////////////////
2728 // //
2729 // VTS set //
2730 // //
2731 /////////////////////////////////////////////////////////
2732
2733 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2734
vts_set_init(void)2735 static void vts_set_init ( void )
2736 {
2737 tl_assert(!vts_set);
2738 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2739 HG_(free),
2740 (Word(*)(UWord,UWord))VTS__cmp_structural );
2741 }
2742
2743 /* Given a VTS, look in vts_set to see if we already have a
2744 structurally identical one. If yes, return the pair (True, pointer
2745 to the existing one). If no, clone this one, add the clone to the
2746 set, and return (False, pointer to the clone). */
vts_set__find__or__clone_and_add(VTS ** res,VTS * cand)2747 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2748 {
2749 UWord keyW, valW;
2750 stats__vts_set__focaa++;
2751 tl_assert(cand->id == VtsID_INVALID);
2752 /* lookup cand (by value) */
2753 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2754 /* found it */
2755 tl_assert(valW == 0);
2756 /* if this fails, cand (by ref) was already present (!) */
2757 tl_assert(keyW != (UWord)cand);
2758 *res = (VTS*)keyW;
2759 return True;
2760 } else {
2761 /* not present. Clone, add and return address of clone. */
2762 stats__vts_set__focaa_a++;
2763 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2764 tl_assert(clone != cand);
2765 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2766 *res = clone;
2767 return False;
2768 }
2769 }
2770
2771
2772 /////////////////////////////////////////////////////////
2773 // //
2774 // VTS table //
2775 // //
2776 /////////////////////////////////////////////////////////
2777
2778 static void VtsID__invalidate_caches ( void ); /* fwds */
2779
2780 /* A type to hold VTS table entries. Invariants:
2781 If .vts == NULL, then this entry is not in use, so:
2782 - .rc == 0
2783 - this entry is on the freelist (unfortunately, does not imply
2784 any constraints on value for u.freelink)
2785 If .vts != NULL, then this entry is in use:
2786 - .vts is findable in vts_set
2787 - .vts->id == this entry number
2788 - no specific value for .rc (even 0 is OK)
2789 - this entry is not on freelist, so u.freelink == VtsID_INVALID
2790 */
2791 typedef
2792 struct {
2793 VTS* vts; /* vts, in vts_set */
2794 UWord rc; /* reference count - enough for entire aspace */
2795 union {
2796 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2797 VtsID remap; /* used only during pruning, for used entries */
2798 } u;
2799 /* u.freelink only used when vts == NULL,
2800 u.remap only used when vts != NULL, during pruning. */
2801 }
2802 VtsTE;
2803
2804 /* The VTS table. */
2805 static XArray* /* of VtsTE */ vts_tab = NULL;
2806
2807 /* An index into the VTS table, indicating the start of the list of
2808 free (available for use) entries. If the list is empty, this is
2809 VtsID_INVALID. */
2810 static VtsID vts_tab_freelist = VtsID_INVALID;
2811
2812 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2813 vts_tab equals or exceeds this size. After GC, the value here is
2814 set appropriately so as to check for the next GC point. */
2815 static Word vts_next_GC_at = 1000;
2816
vts_tab_init(void)2817 static void vts_tab_init ( void )
2818 {
2819 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2820 HG_(free), sizeof(VtsTE) );
2821 vts_tab_freelist = VtsID_INVALID;
2822 }
2823
2824 /* Add ii to the free list, checking that it looks out-of-use. */
add_to_free_list(VtsID ii)2825 static void add_to_free_list ( VtsID ii )
2826 {
2827 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2828 tl_assert(ie->vts == NULL);
2829 tl_assert(ie->rc == 0);
2830 tl_assert(ie->u.freelink == VtsID_INVALID);
2831 ie->u.freelink = vts_tab_freelist;
2832 vts_tab_freelist = ii;
2833 }
2834
2835 /* Get an entry from the free list. This will return VtsID_INVALID if
2836 the free list is empty. */
get_from_free_list(void)2837 static VtsID get_from_free_list ( void )
2838 {
2839 VtsID ii;
2840 VtsTE* ie;
2841 if (vts_tab_freelist == VtsID_INVALID)
2842 return VtsID_INVALID;
2843 ii = vts_tab_freelist;
2844 ie = VG_(indexXA)( vts_tab, ii );
2845 tl_assert(ie->vts == NULL);
2846 tl_assert(ie->rc == 0);
2847 vts_tab_freelist = ie->u.freelink;
2848 return ii;
2849 }
2850
2851 /* Produce a new VtsID that can be used, either by getting it from
2852 the freelist, or, if that is empty, by expanding vts_tab. */
get_new_VtsID(void)2853 static VtsID get_new_VtsID ( void )
2854 {
2855 VtsID ii;
2856 VtsTE te;
2857 ii = get_from_free_list();
2858 if (ii != VtsID_INVALID)
2859 return ii;
2860 te.vts = NULL;
2861 te.rc = 0;
2862 te.u.freelink = VtsID_INVALID;
2863 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2864 return ii;
2865 }
2866
2867
2868 /* Indirect callback from lib_zsm. */
VtsID__rcinc(VtsID ii)2869 static void VtsID__rcinc ( VtsID ii )
2870 {
2871 VtsTE* ie;
2872 /* VG_(indexXA) does a range check for us */
2873 ie = VG_(indexXA)( vts_tab, ii );
2874 tl_assert(ie->vts); /* else it's not in use */
2875 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2876 tl_assert(ie->vts->id == ii);
2877 ie->rc++;
2878 }
2879
2880 /* Indirect callback from lib_zsm. */
VtsID__rcdec(VtsID ii)2881 static void VtsID__rcdec ( VtsID ii )
2882 {
2883 VtsTE* ie;
2884 /* VG_(indexXA) does a range check for us */
2885 ie = VG_(indexXA)( vts_tab, ii );
2886 tl_assert(ie->vts); /* else it's not in use */
2887 tl_assert(ie->rc > 0); /* else RC snafu */
2888 tl_assert(ie->vts->id == ii);
2889 ie->rc--;
2890 }
2891
2892
2893 /* Look up 'cand' in our collection of VTSs. If present, return the
2894 VtsID for the pre-existing version. If not present, clone it, add
2895 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2896 it, and return that. */
vts_tab__find__or__clone_and_add(VTS * cand)2897 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2898 {
2899 VTS* in_tab = NULL;
2900 tl_assert(cand->id == VtsID_INVALID);
2901 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2902 tl_assert(in_tab);
2903 if (already_have) {
2904 /* We already have a copy of 'cand'. Use that. */
2905 VtsTE* ie;
2906 tl_assert(in_tab->id != VtsID_INVALID);
2907 ie = VG_(indexXA)( vts_tab, in_tab->id );
2908 tl_assert(ie->vts == in_tab);
2909 return in_tab->id;
2910 } else {
2911 VtsID ii = get_new_VtsID();
2912 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2913 ie->vts = in_tab;
2914 ie->rc = 0;
2915 ie->u.freelink = VtsID_INVALID;
2916 in_tab->id = ii;
2917 return ii;
2918 }
2919 }
2920
2921
show_vts_stats(const HChar * caller)2922 static void show_vts_stats ( const HChar* caller )
2923 {
2924 UWord nSet, nTab, nLive;
2925 ULong totrc;
2926 UWord n, i;
2927 nSet = VG_(sizeFM)( vts_set );
2928 nTab = VG_(sizeXA)( vts_tab );
2929 totrc = 0;
2930 nLive = 0;
2931 n = VG_(sizeXA)( vts_tab );
2932 for (i = 0; i < n; i++) {
2933 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2934 if (ie->vts) {
2935 nLive++;
2936 totrc += (ULong)ie->rc;
2937 } else {
2938 tl_assert(ie->rc == 0);
2939 }
2940 }
2941 VG_(printf)(" show_vts_stats %s\n", caller);
2942 VG_(printf)(" vts_tab size %4lu\n", nTab);
2943 VG_(printf)(" vts_tab live %4lu\n", nLive);
2944 VG_(printf)(" vts_set size %4lu\n", nSet);
2945 VG_(printf)(" total rc %4llu\n", totrc);
2946 }
2947
2948
2949 /* --- Helpers for VtsID pruning --- */
2950
2951 static
remap_VtsID(XArray * old_tab,XArray * new_tab,VtsID * ii)2952 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2953 /*MOD*/XArray* /* of VtsTE */ new_tab,
2954 VtsID* ii )
2955 {
2956 VtsTE *old_te, *new_te;
2957 VtsID old_id, new_id;
2958 /* We're relying here on VG_(indexXA)'s range checking to assert on
2959 any stupid values, in particular *ii == VtsID_INVALID. */
2960 old_id = *ii;
2961 old_te = VG_(indexXA)( old_tab, old_id );
2962 old_te->rc--;
2963 new_id = old_te->u.remap;
2964 new_te = VG_(indexXA)( new_tab, new_id );
2965 new_te->rc++;
2966 *ii = new_id;
2967 }
2968
2969 static
remap_VtsIDs_in_SVal(XArray * old_tab,XArray * new_tab,SVal * s)2970 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2971 /*MOD*/XArray* /* of VtsTE */ new_tab,
2972 SVal* s )
2973 {
2974 SVal old_sv, new_sv;
2975 old_sv = *s;
2976 if (SVal__isC(old_sv)) {
2977 VtsID rMin, wMin;
2978 rMin = SVal__unC_Rmin(old_sv);
2979 wMin = SVal__unC_Wmin(old_sv);
2980 remap_VtsID( old_tab, new_tab, &rMin );
2981 remap_VtsID( old_tab, new_tab, &wMin );
2982 new_sv = SVal__mkC( rMin, wMin );
2983 *s = new_sv;
2984 }
2985 }
2986
2987
2988 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2989 __attribute__((noinline))
vts_tab__do_GC(Bool show_stats)2990 static void vts_tab__do_GC ( Bool show_stats )
2991 {
2992 UWord i, nTab, nLive, nFreed;
2993
2994 /* ---------- BEGIN VTS GC ---------- */
2995 /* check this is actually necessary. */
2996 tl_assert(vts_tab_freelist == VtsID_INVALID);
2997
2998 /* empty the caches for partial order checks and binary joins. We
2999 could do better and prune out the entries to be deleted, but it
3000 ain't worth the hassle. */
3001 VtsID__invalidate_caches();
3002
3003 /* First, make the reference counts up to date. */
3004 zsm_flush_cache();
3005
3006 nTab = VG_(sizeXA)( vts_tab );
3007
3008 if (show_stats) {
3009 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
3010 show_vts_stats("before GC");
3011 }
3012
3013 /* Now we can inspect the entire vts_tab. Any entries with zero
3014 .rc fields are now no longer in use and can be put back on the
3015 free list, removed from vts_set, and deleted. */
3016 nFreed = 0;
3017 for (i = 0; i < nTab; i++) {
3018 Bool present;
3019 UWord oldK = 0, oldV = 12345;
3020 VtsTE* te = VG_(indexXA)( vts_tab, i );
3021 if (te->vts == NULL) {
3022 tl_assert(te->rc == 0);
3023 continue; /* already on the free list (presumably) */
3024 }
3025 if (te->rc > 0)
3026 continue; /* in use */
3027 /* Ok, we got one we can free. */
3028 tl_assert(te->vts->id == i);
3029 /* first, remove it from vts_set. */
3030 present = VG_(delFromFM)( vts_set,
3031 &oldK, &oldV, (UWord)te->vts );
3032 tl_assert(present); /* else it isn't in vts_set ?! */
3033 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3034 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3035 /* now free the VTS itself */
3036 VTS__delete(te->vts);
3037 te->vts = NULL;
3038 /* and finally put this entry on the free list */
3039 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
3040 add_to_free_list( i );
3041 nFreed++;
3042 }
3043
3044 /* Now figure out when the next GC should be. We'll allow the
3045 number of VTSs to double before GCing again. Except of course
3046 that since we can't (or, at least, don't) shrink vts_tab, we
3047 can't set the threshold value smaller than it. */
3048 tl_assert(nFreed <= nTab);
3049 nLive = nTab - nFreed;
3050 tl_assert(nLive >= 0 && nLive <= nTab);
3051 vts_next_GC_at = 2 * nLive;
3052 if (vts_next_GC_at < nTab)
3053 vts_next_GC_at = nTab;
3054
3055 if (show_stats) {
3056 show_vts_stats("after GC");
3057 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3058 }
3059
3060 stats__vts_tab_GC++;
3061 if (VG_(clo_stats)) {
3062 tl_assert(nTab > 0);
3063 VG_(message)(Vg_DebugMsg,
3064 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3065 stats__vts_tab_GC,
3066 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
3067 }
3068 /* ---------- END VTS GC ---------- */
3069
3070 /* Decide whether to do VTS pruning. We have one of three
3071 settings. */
3072 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3073
3074 Bool do_pruning = False;
3075 switch (HG_(clo_vts_pruning)) {
3076 case 0: /* never */
3077 break;
3078 case 1: /* auto */
3079 do_pruning = (++pruning_auto_ctr % 5) == 0;
3080 break;
3081 case 2: /* always */
3082 do_pruning = True;
3083 break;
3084 default:
3085 tl_assert(0);
3086 }
3087
3088 /* The rest of this routine only handles pruning, so we can
3089 quit at this point if it is not to be done. */
3090 if (!do_pruning)
3091 return;
3092 /* No need to do pruning if no thread died since the last pruning as
3093 no VtsTE can be pruned. */
3094 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3095 return;
3096
3097 /* ---------- BEGIN VTS PRUNING ---------- */
3098 /* Sort and check the very dead threads that died since the last pruning.
3099 Sorting is used for the check and so that we can quickly look
3100 up the dead-thread entries as we work through the VTSs. */
3101 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
3102
3103 /* We will run through the old table, and create a new table and
3104 set, at the same time setting the u.remap entries in the old
3105 table to point to the new entries. Then, visit every VtsID in
3106 the system, and replace all of them with new ones, using the
3107 u.remap entries in the old table. Finally, we can delete the old
3108 table and set. */
3109
3110 XArray* /* of VtsTE */ new_tab
3111 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3112 HG_(free), sizeof(VtsTE) );
3113
3114 /* WordFM VTS* void */
3115 WordFM* new_set
3116 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3117 HG_(free),
3118 (Word(*)(UWord,UWord))VTS__cmp_structural );
3119
3120 /* Visit each old VTS. For each one:
3121
3122 * make a pruned version
3123
3124 * search new_set for the pruned version, yielding either
3125 Nothing (not present) or the new VtsID for it.
3126
3127 * if not present, allocate a new VtsID for it, insert (pruned
3128 VTS, new VtsID) in the tree, and set
3129 remap_table[old VtsID] = new VtsID.
3130
3131 * if present, set remap_table[old VtsID] = new VtsID, where
3132 new VtsID was determined by the tree lookup. Then free up
3133 the clone.
3134 */
3135
3136 UWord nBeforePruning = 0, nAfterPruning = 0;
3137 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3138 VtsID new_VtsID_ctr = 0;
3139
3140 for (i = 0; i < nTab; i++) {
3141
3142 /* For each old VTS .. */
3143 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3144 VTS* old_vts = old_te->vts;
3145
3146 /* Skip it if not in use */
3147 if (old_te->rc == 0) {
3148 tl_assert(old_vts == NULL);
3149 continue;
3150 }
3151 tl_assert(old_te->u.remap == VtsID_INVALID);
3152 tl_assert(old_vts != NULL);
3153 tl_assert(old_vts->id == i);
3154 tl_assert(old_vts->ts != NULL);
3155
3156 /* It is in use. Make a pruned version. */
3157 nBeforePruning++;
3158 nSTSsBefore += old_vts->usedTS;
3159 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
3160 old_vts, verydead_thread_table_not_pruned);
3161 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3162 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3163 == 0x0ddC0ffeeBadF00dULL);
3164
3165 /* Get rid of the old VTS and the tree entry. It's a bit more
3166 complex to incrementally delete the VTSs now than to nuke
3167 them all after we're done, but the upside is that we don't
3168 wind up temporarily storing potentially two complete copies
3169 of each VTS and hence spiking memory use. */
3170 UWord oldK = 0, oldV = 12345;
3171 Bool present = VG_(delFromFM)( vts_set,
3172 &oldK, &oldV, (UWord)old_vts );
3173 tl_assert(present); /* else it isn't in vts_set ?! */
3174 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3175 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3176 /* now free the VTS itself */
3177 VTS__delete(old_vts);
3178 old_te->vts = NULL;
3179 old_vts = NULL;
3180
3181 /* NO MENTIONS of old_vts allowed beyond this point. */
3182
3183 /* Ok, we have the pruned copy in new_vts. See if a
3184 structurally identical version is already present in new_set.
3185 If so, delete the one we just made and move on; if not, add
3186 it. */
3187 VTS* identical_version = NULL;
3188 UWord valW = 12345;
3189 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3190 (UWord)new_vts)) {
3191 // already have it
3192 tl_assert(valW == 0);
3193 tl_assert(identical_version != NULL);
3194 tl_assert(identical_version != new_vts);
3195 VTS__delete(new_vts);
3196 new_vts = identical_version;
3197 tl_assert(new_vts->id != VtsID_INVALID);
3198 } else {
3199 tl_assert(valW == 12345);
3200 tl_assert(identical_version == NULL);
3201 new_vts->id = new_VtsID_ctr++;
3202 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3203 tl_assert(!b);
3204 VtsTE new_te;
3205 new_te.vts = new_vts;
3206 new_te.rc = 0;
3207 new_te.u.freelink = VtsID_INVALID;
3208 Word j = VG_(addToXA)( new_tab, &new_te );
3209 tl_assert(j <= i);
3210 tl_assert(j == new_VtsID_ctr - 1);
3211 // stats
3212 nAfterPruning++;
3213 nSTSsAfter += new_vts->usedTS;
3214 }
3215 old_te->u.remap = new_vts->id;
3216
3217 } /* for (i = 0; i < nTab; i++) */
3218
3219 /* Move very dead thread from verydead_thread_table_not_pruned to
3220 verydead_thread_table. Sort and check verydead_thread_table
3221 to verify a thread was reported very dead only once. */
3222 {
3223 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3224
3225 for (i = 0; i < nBT; i++) {
3226 ThrID thrid =
3227 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3228 VG_(addToXA)( verydead_thread_table, &thrid );
3229 }
3230 verydead_thread_table_sort_and_check (verydead_thread_table);
3231 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3232 }
3233
3234 /* At this point, we have:
3235 * the old VTS table, with its u.remap entries set,
3236 and with all .vts == NULL.
3237 * the old VTS tree should be empty, since it and the old VTSs
3238 it contained have been incrementally deleted was we worked
3239 through the old table.
3240 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
3241 == VtsID_INVALID.
3242 * the new VTS tree.
3243 */
3244 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3245
3246 /* Now actually apply the mapping. */
3247 /* Visit all the VtsIDs in the entire system. Where do we expect
3248 to find them?
3249 (a) in shadow memory -- the LineZs and LineFs
3250 (b) in our collection of struct _Thrs.
3251 (c) in our collection of struct _SOs.
3252 Nowhere else, AFAICS. Not in the zsm cache, because that just
3253 got invalidated.
3254
3255 Using the u.remap fields in vts_tab, map each old VtsID to a new
3256 VtsID. For each old VtsID, dec its rc; and for each new one,
3257 inc it. This sets up the new refcounts, and it also gives a
3258 cheap sanity check of the old ones: all old refcounts should be
3259 zero after this operation.
3260 */
3261
3262 /* Do the mappings for (a) above: iterate over the Primary shadow
3263 mem map (WordFM Addr SecMap*). */
3264 UWord secmapW = 0;
3265 VG_(initIterFM)( map_shmem );
3266 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3267 UWord j;
3268 SecMap* sm = (SecMap*)secmapW;
3269 tl_assert(sm->magic == SecMap_MAGIC);
3270 /* Deal with the LineZs */
3271 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3272 LineZ* lineZ = &sm->linesZ[i];
3273 if (lineZ->dict[0] != SVal_INVALID) {
3274 for (j = 0; j < 4; j++)
3275 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3276 } else {
3277 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3278 for (j = 0; j < N_LINE_ARANGE; j++)
3279 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3280 }
3281 }
3282 }
3283 VG_(doneIterFM)( map_shmem );
3284
3285 /* Do the mappings for (b) above: visit our collection of struct
3286 _Thrs. */
3287 Thread* hgthread = get_admin_threads();
3288 tl_assert(hgthread);
3289 while (hgthread) {
3290 Thr* hbthr = hgthread->hbthr;
3291 tl_assert(hbthr);
3292 /* Threads that are listed in the prunable set have their viR
3293 and viW set to VtsID_INVALID, so we can't mess with them. */
3294 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3295 tl_assert(hbthr->viR == VtsID_INVALID);
3296 tl_assert(hbthr->viW == VtsID_INVALID);
3297 hgthread = hgthread->admin;
3298 continue;
3299 }
3300 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3301 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3302 hgthread = hgthread->admin;
3303 }
3304
3305 /* Do the mappings for (c) above: visit the struct _SOs. */
3306 SO* so = admin_SO;
3307 while (so) {
3308 if (so->viR != VtsID_INVALID)
3309 remap_VtsID( vts_tab, new_tab, &so->viR );
3310 if (so->viW != VtsID_INVALID)
3311 remap_VtsID( vts_tab, new_tab, &so->viW );
3312 so = so->admin_next;
3313 }
3314
3315 /* So, we're nearly done (with this incredibly complex operation).
3316 Check the refcounts for the old VtsIDs all fell to zero, as
3317 expected. Any failure is serious. */
3318 for (i = 0; i < nTab; i++) {
3319 VtsTE* te = VG_(indexXA)( vts_tab, i );
3320 tl_assert(te->vts == NULL);
3321 /* This is the assert proper. Note we're also asserting
3322 zeroness for old entries which are unmapped. That's OK. */
3323 tl_assert(te->rc == 0);
3324 }
3325
3326 /* Install the new table and set. */
3327 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3328 vts_set = new_set;
3329 VG_(deleteXA)( vts_tab );
3330 vts_tab = new_tab;
3331
3332 /* The freelist of vts_tab entries is empty now, because we've
3333 compacted all of the live entries at the low end of the
3334 table. */
3335 vts_tab_freelist = VtsID_INVALID;
3336
3337 /* Sanity check vts_set and vts_tab. */
3338
3339 /* Because all the live entries got slid down to the bottom of vts_tab: */
3340 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3341
3342 /* Assert that the vts_tab and vts_set entries point at each other
3343 in the required way */
3344 UWord wordK = 0, wordV = 0;
3345 VG_(initIterFM)( vts_set );
3346 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3347 tl_assert(wordK != 0);
3348 tl_assert(wordV == 0);
3349 VTS* vts = (VTS*)wordK;
3350 tl_assert(vts->id != VtsID_INVALID);
3351 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3352 tl_assert(te->vts == vts);
3353 }
3354 VG_(doneIterFM)( vts_set );
3355
3356 /* Also iterate over the table, and check each entry is
3357 plausible. */
3358 nTab = VG_(sizeXA)( vts_tab );
3359 for (i = 0; i < nTab; i++) {
3360 VtsTE* te = VG_(indexXA)( vts_tab, i );
3361 tl_assert(te->vts);
3362 tl_assert(te->vts->id == i);
3363 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3364 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3365 /* value of te->u.remap not relevant */
3366 }
3367
3368 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3369 stats__vts_pruning++;
3370 if (VG_(clo_stats)) {
3371 tl_assert(nTab > 0);
3372 VG_(message)(
3373 Vg_DebugMsg,
3374 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
3375 "after %lu (avg sz %lu)\n",
3376 stats__vts_pruning,
3377 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3378 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3379 );
3380 }
3381 /* ---------- END VTS PRUNING ---------- */
3382 }
3383
3384
3385 /////////////////////////////////////////////////////////
3386 // //
3387 // Vts IDs //
3388 // //
3389 /////////////////////////////////////////////////////////
3390
3391 //////////////////////////
3392 /* A temporary, max-sized VTS which is used as a temporary (the first
3393 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3394 static VTS* temp_max_sized_VTS = NULL;
3395
3396 //////////////////////////
3397 static ULong stats__cmpLEQ_queries = 0;
3398 static ULong stats__cmpLEQ_misses = 0;
3399 static ULong stats__join2_queries = 0;
3400 static ULong stats__join2_misses = 0;
3401
ROL32(UInt w,Int n)3402 static inline UInt ROL32 ( UInt w, Int n ) {
3403 w = (w << n) | (w >> (32-n));
3404 return w;
3405 }
hash_VtsIDs(VtsID vi1,VtsID vi2,UInt nTab)3406 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3407 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3408 return hash % nTab;
3409 }
3410
3411 #define N_CMPLEQ_CACHE 1023
3412 static
3413 struct { VtsID vi1; VtsID vi2; Bool leq; }
3414 cmpLEQ_cache[N_CMPLEQ_CACHE];
3415
3416 #define N_JOIN2_CACHE 1023
3417 static
3418 struct { VtsID vi1; VtsID vi2; VtsID res; }
3419 join2_cache[N_JOIN2_CACHE];
3420
VtsID__invalidate_caches(void)3421 static void VtsID__invalidate_caches ( void ) {
3422 Int i;
3423 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3424 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3425 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3426 cmpLEQ_cache[i].leq = False;
3427 }
3428 for (i = 0; i < N_JOIN2_CACHE; i++) {
3429 join2_cache[i].vi1 = VtsID_INVALID;
3430 join2_cache[i].vi2 = VtsID_INVALID;
3431 join2_cache[i].res = VtsID_INVALID;
3432 }
3433 }
3434 //////////////////////////
3435
3436 //static Bool VtsID__is_valid ( VtsID vi ) {
3437 // VtsTE* ve;
3438 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3439 // return False;
3440 // ve = VG_(indexXA)( vts_tab, vi );
3441 // if (!ve->vts)
3442 // return False;
3443 // tl_assert(ve->vts->id == vi);
3444 // return True;
3445 //}
3446
VtsID__to_VTS(VtsID vi)3447 static VTS* VtsID__to_VTS ( VtsID vi ) {
3448 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3449 tl_assert(te->vts);
3450 return te->vts;
3451 }
3452
VtsID__pp(VtsID vi)3453 static void VtsID__pp ( VtsID vi ) {
3454 VTS* vts = VtsID__to_VTS(vi);
3455 VTS__show( vts );
3456 }
3457
3458 /* compute partial ordering relation of vi1 and vi2. */
3459 __attribute__((noinline))
VtsID__cmpLEQ_WRK(VtsID vi1,VtsID vi2)3460 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3461 UInt hash;
3462 Bool leq;
3463 VTS *v1, *v2;
3464 //if (vi1 == vi2) return True;
3465 tl_assert(vi1 != vi2);
3466 ////++
3467 stats__cmpLEQ_queries++;
3468 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3469 if (cmpLEQ_cache[hash].vi1 == vi1
3470 && cmpLEQ_cache[hash].vi2 == vi2)
3471 return cmpLEQ_cache[hash].leq;
3472 stats__cmpLEQ_misses++;
3473 ////--
3474 v1 = VtsID__to_VTS(vi1);
3475 v2 = VtsID__to_VTS(vi2);
3476 leq = VTS__cmpLEQ( v1, v2 ) == 0;
3477 ////++
3478 cmpLEQ_cache[hash].vi1 = vi1;
3479 cmpLEQ_cache[hash].vi2 = vi2;
3480 cmpLEQ_cache[hash].leq = leq;
3481 ////--
3482 return leq;
3483 }
VtsID__cmpLEQ(VtsID vi1,VtsID vi2)3484 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3485 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
3486 }
3487
3488 /* compute binary join */
3489 __attribute__((noinline))
VtsID__join2_WRK(VtsID vi1,VtsID vi2)3490 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3491 UInt hash;
3492 VtsID res;
3493 VTS *vts1, *vts2;
3494 //if (vi1 == vi2) return vi1;
3495 tl_assert(vi1 != vi2);
3496 ////++
3497 stats__join2_queries++;
3498 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3499 if (join2_cache[hash].vi1 == vi1
3500 && join2_cache[hash].vi2 == vi2)
3501 return join2_cache[hash].res;
3502 stats__join2_misses++;
3503 ////--
3504 vts1 = VtsID__to_VTS(vi1);
3505 vts2 = VtsID__to_VTS(vi2);
3506 temp_max_sized_VTS->usedTS = 0;
3507 VTS__join(temp_max_sized_VTS, vts1,vts2);
3508 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3509 ////++
3510 join2_cache[hash].vi1 = vi1;
3511 join2_cache[hash].vi2 = vi2;
3512 join2_cache[hash].res = res;
3513 ////--
3514 return res;
3515 }
VtsID__join2(VtsID vi1,VtsID vi2)3516 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3517 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
3518 }
3519
3520 /* create a singleton VTS, namely [thr:1] */
VtsID__mk_Singleton(Thr * thr,ULong tym)3521 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3522 temp_max_sized_VTS->usedTS = 0;
3523 VTS__singleton(temp_max_sized_VTS, thr,tym);
3524 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3525 }
3526
3527 /* tick operation, creates value 1 if specified index is absent */
VtsID__tick(VtsID vi,Thr * idx)3528 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3529 VTS* vts = VtsID__to_VTS(vi);
3530 temp_max_sized_VTS->usedTS = 0;
3531 VTS__tick(temp_max_sized_VTS, idx,vts);
3532 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3533 }
3534
3535 /* index into a VTS (only for assertions) */
VtsID__indexAt(VtsID vi,Thr * idx)3536 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3537 VTS* vts = VtsID__to_VTS(vi);
3538 return VTS__indexAt_SLOW( vts, idx );
3539 }
3540
3541 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3542 any, really) element in vi1 which is pointwise greater-than the
3543 corresponding element in vi2. If no such element exists, return
3544 NULL. This needs to be fairly quick since it is called every time
3545 a race is detected. */
VtsID__findFirst_notLEQ(VtsID vi1,VtsID vi2)3546 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3547 {
3548 VTS *vts1, *vts2;
3549 Thr* diffthr;
3550 ThrID diffthrid;
3551 tl_assert(vi1 != vi2);
3552 vts1 = VtsID__to_VTS(vi1);
3553 vts2 = VtsID__to_VTS(vi2);
3554 tl_assert(vts1 != vts2);
3555 diffthrid = VTS__cmpLEQ(vts1, vts2);
3556 diffthr = Thr__from_ThrID(diffthrid);
3557 tl_assert(diffthr); /* else they are LEQ ! */
3558 return diffthr;
3559 }
3560
3561
3562 /////////////////////////////////////////////////////////
3563 // //
3564 // Filters //
3565 // //
3566 /////////////////////////////////////////////////////////
3567
3568 /* Forget everything we know -- clear the filter and let everything
3569 through. This needs to be as fast as possible, since it is called
3570 every time the running thread changes, and every time a thread's
3571 vector clocks change, which can be quite frequent. The obvious
3572 fast way to do this is simply to stuff in tags which we know are
3573 not going to match anything, since they're not aligned to the start
3574 of a line. */
Filter__clear(Filter * fi,const HChar * who)3575 static void Filter__clear ( Filter* fi, const HChar* who )
3576 {
3577 UWord i;
3578 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3579 for (i = 0; i < FI_NUM_LINES; i += 8) {
3580 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3581 fi->tags[i+1] = 1;
3582 fi->tags[i+2] = 1;
3583 fi->tags[i+3] = 1;
3584 fi->tags[i+4] = 1;
3585 fi->tags[i+5] = 1;
3586 fi->tags[i+6] = 1;
3587 fi->tags[i+7] = 1;
3588 }
3589 tl_assert(i == FI_NUM_LINES);
3590 }
3591
3592 /* Clearing an arbitrary range in the filter. Unfortunately
3593 we have to do this due to core-supplied new/die-mem events. */
3594
Filter__clear_1byte(Filter * fi,Addr a)3595 static void Filter__clear_1byte ( Filter* fi, Addr a )
3596 {
3597 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3598 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3599 FiLine* line = &fi->lines[lineno];
3600 UWord loff = (a - atag) / 8;
3601 UShort mask = 0x3 << (2 * (a & 7));
3602 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3603 if (LIKELY( fi->tags[lineno] == atag )) {
3604 /* hit. clear the bits. */
3605 UShort u16 = line->u16s[loff];
3606 line->u16s[loff] = u16 & ~mask; /* clear them */
3607 } else {
3608 /* miss. The filter doesn't hold this address, so ignore. */
3609 }
3610 }
3611
Filter__clear_8bytes_aligned(Filter * fi,Addr a)3612 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3613 {
3614 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3615 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3616 FiLine* line = &fi->lines[lineno];
3617 UWord loff = (a - atag) / 8;
3618 if (LIKELY( fi->tags[lineno] == atag )) {
3619 line->u16s[loff] = 0;
3620 } else {
3621 /* miss. The filter doesn't hold this address, so ignore. */
3622 }
3623 }
3624
3625 /* Only used to verify the fast Filter__clear_range */
3626 __attribute__((unused))
Filter__clear_range_SLOW(Filter * fi,Addr a,UWord len)3627 static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
3628 {
3629 tl_assert (CHECK_ZSM);
3630
3631 /* slowly do part preceding 8-alignment */
3632 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3633 Filter__clear_1byte( fi, a );
3634 a++;
3635 len--;
3636 }
3637 /* vector loop */
3638 while (len >= 8) {
3639 Filter__clear_8bytes_aligned( fi, a );
3640 a += 8;
3641 len -= 8;
3642 }
3643 /* slowly do tail */
3644 while (UNLIKELY(len > 0)) {
3645 Filter__clear_1byte( fi, a );
3646 a++;
3647 len--;
3648 }
3649 }
3650
Filter__clear_range(Filter * fi,Addr a,UWord len)3651 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3652 {
3653 # if CHECK_ZSM > 0
3654 /* We check the below more complex algorithm with the simple one.
3655 This check is very expensive : we do first the slow way on a
3656 copy of the data, then do it the fast way. On RETURN, we check
3657 the two values are equal. */
3658 Filter fi_check = *fi;
3659 Filter__clear_range_SLOW(&fi_check, a, len);
3660 # define RETURN goto check_and_return
3661 # else
3662 # define RETURN return
3663 # endif
3664
3665 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3666
3667 Addr end = a + len - 1;
3668 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3669
3670 UWord rlen = len; /* remaining length to clear */
3671
3672 Addr c = a; /* Current position we are clearing. */
3673 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3674 FiLine* cline; /* Current line we are clearing */
3675 UWord cloff; /* Current offset in line we are clearing, when clearing
3676 partial lines. */
3677
3678 UShort u16;
3679
3680 STATIC_ASSERT (FI_LINE_SZB == 32);
3681 // Below assumes filter lines are 32 bytes
3682
3683 if (LIKELY(fi->tags[clineno] == begtag)) {
3684 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3685 /* First filter line matches begtag.
3686 If c is not at the filter line begin, the below will clear
3687 the filter line bytes starting from c. */
3688 cline = &fi->lines[clineno];
3689 cloff = (c - begtag) / 8;
3690
3691 /* First the byte(s) needed to reach 8-alignment */
3692 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3693 /* hiB is the nr of bytes (higher addresses) from c to reach
3694 8-aligment. */
3695 UWord hiB = 8 - (c & 7);
3696 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3697 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3698 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3699 UShort mask = 0xFFFF << (16 - 2*hiB);
3700
3701 u16 = cline->u16s[cloff];
3702 if (LIKELY(rlen >= hiB)) {
3703 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3704 rlen -= hiB;
3705 c += hiB;
3706 cloff += 1;
3707 } else {
3708 /* Only have the bits for rlen bytes bytes. */
3709 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3710 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3711 RETURN; // We have cleared all what we can.
3712 }
3713 }
3714 /* c is now 8 aligned. Clear by 8 aligned bytes,
3715 till c is filter-line aligned */
3716 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3717 cline->u16s[cloff] = 0;
3718 c += 8;
3719 rlen -= 8;
3720 cloff += 1;
3721 }
3722 } else {
3723 c = begtag + FI_LINE_SZB;
3724 if (c > end)
3725 RETURN; // We have cleared all what we can.
3726 rlen -= c - a;
3727 }
3728 // We have changed c, so re-establish clineno.
3729 clineno = FI_GET_LINENO(c);
3730
3731 if (rlen >= FI_LINE_SZB) {
3732 /* Here, c is filter line-aligned. Clear all full lines that
3733 overlap with the range starting at c, made of a full lines */
3734 UWord nfull = rlen / FI_LINE_SZB;
3735 UWord full_len = nfull * FI_LINE_SZB;
3736 rlen -= full_len;
3737 if (nfull > FI_NUM_LINES)
3738 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3739
3740 for (UWord n = 0; n < nfull; n++) {
3741 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3742 cline = &fi->lines[clineno];
3743 cline->u16s[0] = 0;
3744 cline->u16s[1] = 0;
3745 cline->u16s[2] = 0;
3746 cline->u16s[3] = 0;
3747 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3748 }
3749 clineno++;
3750 if (UNLIKELY(clineno == FI_NUM_LINES))
3751 clineno = 0;
3752 }
3753
3754 c += full_len;
3755 clineno = FI_GET_LINENO(c);
3756 }
3757
3758 if (CHECK_ZSM) {
3759 tl_assert(VG_IS_8_ALIGNED(c));
3760 tl_assert(clineno == FI_GET_LINENO(c));
3761 }
3762
3763 /* Do the last filter line, if it was not cleared as a full filter line */
3764 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3765 cline = &fi->lines[clineno];
3766 cloff = (c - endtag) / 8;
3767 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3768
3769 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3770 8 bytes. */
3771 while (rlen >= 8) {
3772 cline->u16s[cloff] = 0;
3773 c += 8;
3774 rlen -= 8;
3775 cloff += 1;
3776 }
3777 /* Then the remaining byte(s) */
3778 if (rlen > 0) {
3779 /* nr of bytes from c to reach end. */
3780 UWord loB = rlen;
3781 /* Compute mask representing loB bytes [c..c+loB[ :
3782 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3783 UShort mask = 0xFFFF >> (16 - 2*loB);
3784
3785 u16 = cline->u16s[cloff];
3786 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3787 }
3788 }
3789
3790 # if CHECK_ZSM > 0
3791 check_and_return:
3792 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3793 # endif
3794 # undef RETURN
3795 }
3796
3797 /* ------ Read handlers for the filter. ------ */
3798
Filter__ok_to_skip_crd64(Filter * fi,Addr a)3799 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3800 {
3801 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3802 return False;
3803 {
3804 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3805 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3806 FiLine* line = &fi->lines[lineno];
3807 UWord loff = (a - atag) / 8;
3808 UShort mask = 0xAAAA;
3809 if (LIKELY( fi->tags[lineno] == atag )) {
3810 /* hit. check line and update. */
3811 UShort u16 = line->u16s[loff];
3812 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3813 line->u16s[loff] = u16 | mask; /* set them */
3814 return ok;
3815 } else {
3816 /* miss. nuke existing line and re-use it. */
3817 UWord i;
3818 fi->tags[lineno] = atag;
3819 for (i = 0; i < FI_LINE_SZB / 8; i++)
3820 line->u16s[i] = 0;
3821 line->u16s[loff] = mask;
3822 return False;
3823 }
3824 }
3825 }
3826
Filter__ok_to_skip_crd32(Filter * fi,Addr a)3827 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3828 {
3829 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3830 return False;
3831 {
3832 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3833 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3834 FiLine* line = &fi->lines[lineno];
3835 UWord loff = (a - atag) / 8;
3836 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3837 if (LIKELY( fi->tags[lineno] == atag )) {
3838 /* hit. check line and update. */
3839 UShort u16 = line->u16s[loff];
3840 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3841 line->u16s[loff] = u16 | mask; /* set them */
3842 return ok;
3843 } else {
3844 /* miss. nuke existing line and re-use it. */
3845 UWord i;
3846 fi->tags[lineno] = atag;
3847 for (i = 0; i < FI_LINE_SZB / 8; i++)
3848 line->u16s[i] = 0;
3849 line->u16s[loff] = mask;
3850 return False;
3851 }
3852 }
3853 }
3854
Filter__ok_to_skip_crd16(Filter * fi,Addr a)3855 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3856 {
3857 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3858 return False;
3859 {
3860 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3861 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3862 FiLine* line = &fi->lines[lineno];
3863 UWord loff = (a - atag) / 8;
3864 UShort mask = 0xA << (2 * (a & 6));
3865 /* mask is A000, 0A00, 00A0 or 000A */
3866 if (LIKELY( fi->tags[lineno] == atag )) {
3867 /* hit. check line and update. */
3868 UShort u16 = line->u16s[loff];
3869 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3870 line->u16s[loff] = u16 | mask; /* set them */
3871 return ok;
3872 } else {
3873 /* miss. nuke existing line and re-use it. */
3874 UWord i;
3875 fi->tags[lineno] = atag;
3876 for (i = 0; i < FI_LINE_SZB / 8; i++)
3877 line->u16s[i] = 0;
3878 line->u16s[loff] = mask;
3879 return False;
3880 }
3881 }
3882 }
3883
Filter__ok_to_skip_crd08(Filter * fi,Addr a)3884 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3885 {
3886 {
3887 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3888 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3889 FiLine* line = &fi->lines[lineno];
3890 UWord loff = (a - atag) / 8;
3891 UShort mask = 0x2 << (2 * (a & 7));
3892 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3893 if (LIKELY( fi->tags[lineno] == atag )) {
3894 /* hit. check line and update. */
3895 UShort u16 = line->u16s[loff];
3896 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3897 line->u16s[loff] = u16 | mask; /* set them */
3898 return ok;
3899 } else {
3900 /* miss. nuke existing line and re-use it. */
3901 UWord i;
3902 fi->tags[lineno] = atag;
3903 for (i = 0; i < FI_LINE_SZB / 8; i++)
3904 line->u16s[i] = 0;
3905 line->u16s[loff] = mask;
3906 return False;
3907 }
3908 }
3909 }
3910
3911
3912 /* ------ Write handlers for the filter. ------ */
3913
Filter__ok_to_skip_cwr64(Filter * fi,Addr a)3914 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3915 {
3916 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3917 return False;
3918 {
3919 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3920 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3921 FiLine* line = &fi->lines[lineno];
3922 UWord loff = (a - atag) / 8;
3923 UShort mask = 0xFFFF;
3924 if (LIKELY( fi->tags[lineno] == atag )) {
3925 /* hit. check line and update. */
3926 UShort u16 = line->u16s[loff];
3927 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3928 line->u16s[loff] = u16 | mask; /* set them */
3929 return ok;
3930 } else {
3931 /* miss. nuke existing line and re-use it. */
3932 UWord i;
3933 fi->tags[lineno] = atag;
3934 for (i = 0; i < FI_LINE_SZB / 8; i++)
3935 line->u16s[i] = 0;
3936 line->u16s[loff] = mask;
3937 return False;
3938 }
3939 }
3940 }
3941
Filter__ok_to_skip_cwr32(Filter * fi,Addr a)3942 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3943 {
3944 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3945 return False;
3946 {
3947 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3948 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3949 FiLine* line = &fi->lines[lineno];
3950 UWord loff = (a - atag) / 8;
3951 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3952 if (LIKELY( fi->tags[lineno] == atag )) {
3953 /* hit. check line and update. */
3954 UShort u16 = line->u16s[loff];
3955 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3956 line->u16s[loff] = u16 | mask; /* set them */
3957 return ok;
3958 } else {
3959 /* miss. nuke existing line and re-use it. */
3960 UWord i;
3961 fi->tags[lineno] = atag;
3962 for (i = 0; i < FI_LINE_SZB / 8; i++)
3963 line->u16s[i] = 0;
3964 line->u16s[loff] = mask;
3965 return False;
3966 }
3967 }
3968 }
3969
Filter__ok_to_skip_cwr16(Filter * fi,Addr a)3970 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3971 {
3972 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3973 return False;
3974 {
3975 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3976 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3977 FiLine* line = &fi->lines[lineno];
3978 UWord loff = (a - atag) / 8;
3979 UShort mask = 0xF << (2 * (a & 6));
3980 /* mask is F000, 0F00, 00F0 or 000F */
3981 if (LIKELY( fi->tags[lineno] == atag )) {
3982 /* hit. check line and update. */
3983 UShort u16 = line->u16s[loff];
3984 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3985 line->u16s[loff] = u16 | mask; /* set them */
3986 return ok;
3987 } else {
3988 /* miss. nuke existing line and re-use it. */
3989 UWord i;
3990 fi->tags[lineno] = atag;
3991 for (i = 0; i < FI_LINE_SZB / 8; i++)
3992 line->u16s[i] = 0;
3993 line->u16s[loff] = mask;
3994 return False;
3995 }
3996 }
3997 }
3998
Filter__ok_to_skip_cwr08(Filter * fi,Addr a)3999 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
4000 {
4001 {
4002 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
4003 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
4004 FiLine* line = &fi->lines[lineno];
4005 UWord loff = (a - atag) / 8;
4006 UShort mask = 0x3 << (2 * (a & 7));
4007 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
4008 if (LIKELY( fi->tags[lineno] == atag )) {
4009 /* hit. check line and update. */
4010 UShort u16 = line->u16s[loff];
4011 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
4012 line->u16s[loff] = u16 | mask; /* set them */
4013 return ok;
4014 } else {
4015 /* miss. nuke existing line and re-use it. */
4016 UWord i;
4017 fi->tags[lineno] = atag;
4018 for (i = 0; i < FI_LINE_SZB / 8; i++)
4019 line->u16s[i] = 0;
4020 line->u16s[loff] = mask;
4021 return False;
4022 }
4023 }
4024 }
4025
4026
4027 /////////////////////////////////////////////////////////
4028 // //
4029 // Threads //
4030 // //
4031 /////////////////////////////////////////////////////////
4032
4033 /* Maps ThrID values to their Thr*s (which contain ThrID values that
4034 should point back to the relevant slot in the array. Lowest
4035 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4036 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4037
4038 /* And a counter to dole out ThrID values. For rationale/background,
4039 see comments on definition of ScalarTS (far) above. */
4040 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
4041
Thr__to_ThrID(Thr * thr)4042 static ThrID Thr__to_ThrID ( Thr* thr ) {
4043 return thr->thrid;
4044 }
Thr__from_ThrID(UInt thrid)4045 static Thr* Thr__from_ThrID ( UInt thrid ) {
4046 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4047 tl_assert(thr->thrid == thrid);
4048 return thr;
4049 }
4050
4051 /* True if the cached rcec for thr is valid and can be used to build the
4052 current stack trace just by changing the last frame to the current IP. */
cached_rcec_valid(Thr * thr)4053 static inline Bool cached_rcec_valid(Thr *thr)
4054 {
4055 UWord cached_stackvalid = VG_(get_SP_s1) (thr->hgthread->coretid);
4056 return cached_stackvalid != 0;
4057 }
4058 /* Set the validity of the cached rcec of thr. */
set_cached_rcec_validity(Thr * thr,Bool valid)4059 static inline void set_cached_rcec_validity(Thr *thr, Bool valid)
4060 {
4061 VG_(set_SP_s1) (thr->hgthread->coretid, valid);
4062 }
4063
Thr__new(void)4064 static Thr* Thr__new ( void )
4065 {
4066 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4067 thr->viR = VtsID_INVALID;
4068 thr->viW = VtsID_INVALID;
4069 thr->llexit_done = False;
4070 thr->joinedwith_done = False;
4071 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
4072 if (HG_(clo_history_level) == 1)
4073 thr->local_Kws_n_stacks
4074 = VG_(newXA)( HG_(zalloc),
4075 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4076 HG_(free), sizeof(ULong_n_EC) );
4077 /* Make an 'empty' cached rcec in thr. */
4078 thr->cached_rcec.magic = RCEC_MAGIC;
4079 thr->cached_rcec.rc = 0;
4080 thr->cached_rcec.rcX = 0;
4081 thr->cached_rcec.next = NULL;
4082
4083 /* Add this Thr* <-> ThrID binding to the mapping, and
4084 cross-check */
4085 if (!thrid_to_thr_map) {
4086 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4087 HG_(free), sizeof(Thr*) );
4088 }
4089
4090 if (thrid_counter >= ThrID_MAX_VALID) {
4091 /* We're hosed. We have to stop. */
4092 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4093 }
4094
4095 thr->thrid = thrid_counter++;
4096 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4097 tl_assert(ix + 1024 == thr->thrid);
4098
4099 return thr;
4100 }
4101
note_local_Kw_n_stack_for(Thr * thr)4102 static void note_local_Kw_n_stack_for ( Thr* thr )
4103 {
4104 Word nPresent;
4105 ULong_n_EC pair;
4106 tl_assert(thr);
4107
4108 // We only collect this info at history level 1 (approx)
4109 if (HG_(clo_history_level) != 1)
4110 return;
4111
4112 /* This is the scalar Kw for thr. */
4113 pair.ull = VtsID__indexAt( thr->viW, thr );
4114 pair.ec = main_get_EC( thr );
4115 tl_assert(pair.ec);
4116 tl_assert(thr->local_Kws_n_stacks);
4117
4118 /* check that we're not adding duplicates */
4119 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4120
4121 /* Throw away old stacks, if necessary. We can't accumulate stuff
4122 indefinitely. */
4123 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4124 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4125 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4126 if (0)
4127 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
4128 thr, pair.ull, pair.ec );
4129 }
4130
4131 if (nPresent > 0) {
4132 ULong_n_EC* prevPair
4133 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4134 tl_assert( prevPair->ull <= pair.ull );
4135 }
4136
4137 if (nPresent == 0)
4138 pair.ec = NULL;
4139
4140 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
4141
4142 if (0)
4143 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
4144 thr, pair.ull, pair.ec );
4145 if (0)
4146 VG_(pp_ExeContext)(pair.ec);
4147 }
4148
cmp__ULong_n_EC__by_ULong(const ULong_n_EC * pair1,const ULong_n_EC * pair2)4149 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4150 const ULong_n_EC* pair2 )
4151 {
4152 if (pair1->ull < pair2->ull) return -1;
4153 if (pair1->ull > pair2->ull) return 1;
4154 return 0;
4155 }
4156
4157
4158 /////////////////////////////////////////////////////////
4159 // //
4160 // Shadow Values //
4161 // //
4162 /////////////////////////////////////////////////////////
4163
4164 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4165 // hb_zsm.h. We have to do everything else here.
4166
4167 /* SVal is 64 bit unsigned int.
4168
4169 <---------30---------> <---------30--------->
4170 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
4171 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
4172 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4173
4174 */
4175 #define SVAL_TAGMASK (3ULL << 62)
4176
SVal__isC(SVal s)4177 static inline Bool SVal__isC ( SVal s ) {
4178 return (0ULL << 62) == (s & SVAL_TAGMASK);
4179 }
SVal__mkC(VtsID rmini,VtsID wmini)4180 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4181 //tl_assert(VtsID__is_valid(rmini));
4182 //tl_assert(VtsID__is_valid(wmini));
4183 return (((ULong)rmini) << 32) | ((ULong)wmini);
4184 }
SVal__unC_Rmin(SVal s)4185 static inline VtsID SVal__unC_Rmin ( SVal s ) {
4186 tl_assert(SVal__isC(s));
4187 return (VtsID)(s >> 32);
4188 }
SVal__unC_Wmin(SVal s)4189 static inline VtsID SVal__unC_Wmin ( SVal s ) {
4190 tl_assert(SVal__isC(s));
4191 return (VtsID)(s & 0xFFFFFFFFULL);
4192 }
4193
SVal__isA(SVal s)4194 static inline Bool SVal__isA ( SVal s ) {
4195 return (2ULL << 62) == (s & SVAL_TAGMASK);
4196 }
4197 __attribute__((unused))
SVal__mkA(void)4198 static inline SVal SVal__mkA ( void ) {
4199 return 2ULL << 62;
4200 }
4201
4202 /* Direct callback from lib_zsm. */
SVal__rcinc(SVal s)4203 static inline void SVal__rcinc ( SVal s ) {
4204 if (SVal__isC(s)) {
4205 VtsID__rcinc( SVal__unC_Rmin(s) );
4206 VtsID__rcinc( SVal__unC_Wmin(s) );
4207 }
4208 }
4209
4210 /* Direct callback from lib_zsm. */
SVal__rcdec(SVal s)4211 static inline void SVal__rcdec ( SVal s ) {
4212 if (SVal__isC(s)) {
4213 VtsID__rcdec( SVal__unC_Rmin(s) );
4214 VtsID__rcdec( SVal__unC_Wmin(s) );
4215 }
4216 }
4217
SVal2Ptr(SVal s)4218 static inline void *SVal2Ptr (SVal s)
4219 {
4220 return (void*)(UWord)s;
4221 }
4222
Ptr2SVal(void * ptr)4223 static inline SVal Ptr2SVal (void* ptr)
4224 {
4225 return (SVal)(UWord)ptr;
4226 }
4227
4228
4229
4230 /////////////////////////////////////////////////////////
4231 // //
4232 // Change-event map2 //
4233 // //
4234 /////////////////////////////////////////////////////////
4235
4236 /* This is in two parts:
4237
4238 1. A hash table of RCECs. This is a set of reference-counted stack
4239 traces. When the reference count of a stack trace becomes zero,
4240 it is removed from the set and freed up. The intent is to have
4241 a set of stack traces which can be referred to from (2), but to
4242 only represent each one once. The set is indexed/searched by
4243 ordering on the stack trace vectors.
4244
4245 2. A Hash table of OldRefs. These store information about each old
4246 ref that we need to record. Hash table key is the address of the
4247 location for which the information is recorded. For LRU
4248 purposes, each OldRef in the hash table is also on a doubly
4249 linked list maintaining the order in which the OldRef were most
4250 recently accessed.
4251 Each OldRef also maintains the stamp at which it was last accessed.
4252 With these stamps, we can quickly check which of 2 OldRef is the
4253 'newest', without having to scan the full list of LRU OldRef.
4254
4255 The important part of an OldRef is, however, its acc component.
4256 This binds a TSW triple (thread, size, R/W) to an RCEC.
4257
4258 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4259 Then we do exact LRU discarding. For each discarded OldRef we must
4260 of course decrement the reference count on the RCEC it
4261 refers to, in order that entries from (1) eventually get
4262 discarded too.
4263 */
4264
4265 static UWord stats__evm__lookup_found = 0;
4266 static UWord stats__evm__lookup_notfound = 0;
4267
4268 static UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4269 static UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4270 static UWord stats__ctxt_neq_tsw_neq_rcec = 0;
4271 static UWord stats__ctxt_rcdec_calls = 0;
4272 static UWord stats__ctxt_rcec_gc_discards = 0;
4273
4274 static UWord stats__ctxt_tab_curr = 0;
4275 static UWord stats__ctxt_tab_max = 0;
4276
4277 static UWord stats__ctxt_tab_qs = 0;
4278 static UWord stats__ctxt_tab_cmps = 0;
4279
4280
4281 ///////////////////////////////////////////////////////
4282 //// Part (1): A hash table of RCECs
4283 ///
4284
4285 //#define N_RCEC_TAB 98317 /* prime */
4286 #define N_RCEC_TAB 196613 /* prime */
4287
4288 //////////// BEGIN RCEC pool allocator
4289 static PoolAlloc* rcec_pool_allocator;
alloc_RCEC(void)4290 static RCEC* alloc_RCEC ( void ) {
4291 return VG_(allocEltPA) ( rcec_pool_allocator );
4292 }
4293
free_RCEC(RCEC * rcec)4294 static void free_RCEC ( RCEC* rcec ) {
4295 tl_assert(rcec->magic == RCEC_MAGIC);
4296 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4297 }
4298 //////////// END RCEC pool allocator
4299
4300 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4301
4302 /* Count of allocated RCEC having ref count > 0 */
4303 static UWord RCEC_referenced = 0;
4304
4305 /* True if the frames of ec1 and ec2 are different. */
RCEC__differs_by_frames(RCEC * ec1,RCEC * ec2)4306 static Bool RCEC__differs_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4307 Word i;
4308 if (CHECK_CEM) {
4309 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4310 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
4311 }
4312 if (ec1->frames_hash != ec2->frames_hash) return True;
4313 for (i = 0; i < N_FRAMES; i++) {
4314 if (ec1->frames[i] != ec2->frames[i]) return True;
4315 }
4316 return False;
4317 }
4318
4319 /* Dec the ref of this RCEC. */
ctxt__rcdec(RCEC * ec)4320 static void ctxt__rcdec ( RCEC* ec )
4321 {
4322 stats__ctxt_rcdec_calls++;
4323 if (CHECK_CEM)
4324 tl_assert(ec && ec->magic == RCEC_MAGIC);
4325 tl_assert(ec->rc > 0);
4326 ec->rc--;
4327 if (ec->rc == 0)
4328 RCEC_referenced--;
4329 }
4330
ctxt__rcinc(RCEC * ec)4331 static void ctxt__rcinc ( RCEC* ec )
4332 {
4333 if (CHECK_CEM)
4334 tl_assert(ec && ec->magic == RCEC_MAGIC);
4335 if (ec->rc == 0)
4336 RCEC_referenced++;
4337 ec->rc++;
4338 }
4339
4340
4341 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4342 move it one step closer to the front of the list, so as to make
4343 subsequent searches for it cheaper. */
move_RCEC_one_step_forward(RCEC ** headp,RCEC * ec)4344 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4345 {
4346 RCEC *ec0, *ec1, *ec2;
4347 if (ec == *headp)
4348 tl_assert(0); /* already at head of list */
4349 tl_assert(ec != NULL);
4350 ec0 = *headp;
4351 ec1 = NULL;
4352 ec2 = NULL;
4353 while (True) {
4354 if (ec0 == NULL || ec0 == ec) break;
4355 ec2 = ec1;
4356 ec1 = ec0;
4357 ec0 = ec0->next;
4358 }
4359 tl_assert(ec0 == ec);
4360 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4361 RCEC* tmp;
4362 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4363 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4364 closer to the start of the list. */
4365 tl_assert(ec2->next == ec1);
4366 tl_assert(ec1->next == ec0);
4367 tmp = ec0->next;
4368 ec2->next = ec0;
4369 ec0->next = ec1;
4370 ec1->next = tmp;
4371 }
4372 else
4373 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4374 /* it's second in the list. */
4375 tl_assert(*headp == ec1);
4376 tl_assert(ec1->next == ec0);
4377 ec1->next = ec0->next;
4378 ec0->next = ec1;
4379 *headp = ec0;
4380 }
4381 }
4382
4383
4384 /* Find the given RCEC in the tree, and return a pointer to it. Or,
4385 if not present, add the given one to the tree (by making a copy of
4386 it, so the caller can immediately deallocate the original) and
4387 return a pointer to the copy. The caller can safely have 'example'
4388 on its stack, since we will always return a pointer to a copy of
4389 it, not to the original. Note that the inserted node will have .rc
4390 of zero and so the caller must immediately increment it. */
4391 __attribute__((noinline))
ctxt__find_or_add(RCEC * example)4392 static RCEC* ctxt__find_or_add ( RCEC* example )
4393 {
4394 UWord hent;
4395 RCEC* copy;
4396
4397 if (CHECK_CEM) {
4398 /* Note that the single caller of ctxt__find_or_add always provides
4399 &thr->cached_rcec as argument. The sanity of thr->cached_rcec is always
4400 checked with a thread terminates. */
4401 tl_assert(example && example->magic == RCEC_MAGIC);
4402 tl_assert(example->rc == 0);
4403 }
4404
4405 /* Search the hash table to see if we already have it. */
4406 stats__ctxt_tab_qs++;
4407 hent = example->frames_hash % N_RCEC_TAB;
4408 copy = contextTab[hent];
4409 while (1) {
4410 if (!copy) break;
4411 if (CHECK_CEM)
4412 tl_assert(copy->magic == RCEC_MAGIC);
4413 stats__ctxt_tab_cmps++;
4414 if (!RCEC__differs_by_frames(copy, example)) break;
4415 copy = copy->next;
4416 }
4417
4418 if (copy) {
4419 tl_assert(copy != example);
4420 /* optimisation: if it's not at the head of its list, move 1
4421 step fwds, to make future searches cheaper */
4422 if (copy != contextTab[hent]) {
4423 move_RCEC_one_step_forward( &contextTab[hent], copy );
4424 }
4425 } else {
4426 copy = alloc_RCEC();
4427 tl_assert(copy != example);
4428 *copy = *example;
4429 copy->next = contextTab[hent];
4430 contextTab[hent] = copy;
4431 stats__ctxt_tab_curr++;
4432 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4433 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4434 }
4435 return copy;
4436 }
4437
ROLW(UWord w,Int n)4438 static inline UWord ROLW ( UWord w, Int n )
4439 {
4440 Int bpw = 8 * sizeof(UWord);
4441 w = (w << n) | (w >> (bpw-n));
4442 return w;
4443 }
4444
4445 static UWord stats__cached_rcec_identical = 0;
4446 static UWord stats__cached_rcec_updated = 0;
4447 static UWord stats__cached_rcec_fresh = 0;
4448 static UWord stats__cached_rcec_diff = 0;
4449 static UWord stats__cached_rcec_diff_known_reason = 0;
4450
4451 /* Check if the cached rcec in thr corresponds to the current
4452 stacktrace of the thread. Returns True if ok, False otherwise.
4453 This is just used for debugging the cached rcec logic, activated
4454 using --hg-sanity-flags=xx1xxx i.e. SCE_ACCESS flag.
4455 When this flag is activated, a call to this function will happen each time
4456 a stack trace is needed for a memory access. */
4457 __attribute__((noinline))
check_cached_rcec_ok(Thr * thr,Addr previous_frame0)4458 static Bool check_cached_rcec_ok (Thr* thr, Addr previous_frame0)
4459 {
4460 Bool ok = True;
4461 UInt i;
4462 UWord frames[N_FRAMES];
4463 UWord sps[N_FRAMES];
4464 UWord fps[N_FRAMES];
4465 const DiEpoch cur_ep = VG_(current_DiEpoch)();
4466
4467 for (i = 0; i < N_FRAMES; i++)
4468 frames[i] = sps[i] = fps[i] = 0;
4469 VG_(get_StackTrace)( thr->hgthread->coretid, &frames[0], N_FRAMES,
4470 &sps[0], &fps[0], 0);
4471 for (i = 0; i < N_FRAMES; i++) {
4472 if ( thr->cached_rcec.frames[i] != frames[i] ) {
4473 /* There are a bunch of "normal" reasons for which a stack
4474 derived from the cached rcec differs from frames. */
4475 const HChar *reason = NULL;
4476
4477 /* Old linkers (e.g. RHEL5) gave no cfi unwind information in the PLT
4478 section (fix was added in binutils around June 2011).
4479 Without PLT unwind info, stacktrace in the PLT section are
4480 missing an entry. E.g. the cached stacktrace is:
4481 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4482 ==4463== by 0x33B7F9: __libc_thread_freeres
4483 (in /lib/libc-2.11.2.so)
4484 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4485 ==4463== by 0x2F107D: clone (clone.S:130)
4486 while the 'check stacktrace' is
4487 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4488 ==4463== by 0x33B82D: strerror_thread_freeres
4489 (in /lib/libc-2.11.2.so)
4490 ==4463== by 0x33B7F9: __libc_thread_freeres
4491 (in /lib/libc-2.11.2.so)
4492 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4493 ==4463== by 0x2F107D: clone (clone.S:130)
4494 No cheap/easy way to detect or fix that. */
4495
4496 /* It seems that sometimes, the CFI unwind info looks wrong
4497 for a 'ret' instruction. E.g. here is the unwind info
4498 for a 'retq' on gcc20 (amd64, Debian 7)
4499 [0x4e3ddfe .. 0x4e3ddfe]: let cfa=oldSP+48 in RA=*(cfa+-8)
4500 SP=cfa+0 BP=*(cfa+-24)
4501 This unwind info looks doubtful, as the RA should be at oldSP.
4502 No easy way to detect this problem.
4503 This gives a difference between cached rcec and
4504 current stack trace: the cached rcec is correct. */
4505
4506 /* When returning from main, unwind info becomes erratic.
4507 So, by default, only report errors for main and above,
4508 unless asked to show below main. */
4509 if (reason == NULL) {
4510 UInt fr_main;
4511 Vg_FnNameKind fr_kind;
4512 for (fr_main = 0; fr_main < N_FRAMES; fr_main++) {
4513 fr_kind = VG_(get_fnname_kind_from_IP)
4514 (cur_ep, frames[fr_main]);
4515 if (fr_kind == Vg_FnNameMain || fr_kind == Vg_FnNameBelowMain)
4516 break;
4517 }
4518 UInt kh_main;
4519 Vg_FnNameKind kh_kind;
4520 for (kh_main = 0; kh_main < N_FRAMES; kh_main++) {
4521 kh_kind = VG_(get_fnname_kind_from_IP)
4522 (cur_ep, thr->cached_rcec.frames[kh_main]);
4523 if (kh_kind == Vg_FnNameMain || kh_kind == Vg_FnNameBelowMain)
4524 break;
4525 }
4526 if (kh_main == fr_main
4527 && kh_kind == fr_kind
4528 && (kh_main < i || (kh_main == i
4529 && kh_kind == Vg_FnNameBelowMain))) {
4530 // found main or below main before the difference
4531 reason = "Below main";
4532 }
4533 }
4534
4535 /* We have places where the stack is missing some internal
4536 pthread functions. For such stacktraces, GDB reports only
4537 one function, telling:
4538 #0 0xf7fa81fe in _L_unlock_669 ()
4539 from /lib/i386-linux-gnu/libpthread.so.0
4540 Backtrace stopped: previous frame identical to
4541 this frame (corrupt stack?)
4542
4543 This is when sps and fps are identical.
4544 The cached stack trace is then
4545 ==3336== at 0x40641FE: _L_unlock_669
4546 (pthread_mutex_unlock.c:310)
4547 ==3336== by 0x40302BE: pthread_mutex_unlock
4548 (hg_intercepts.c:710)
4549 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14)
4550 while the 'check stacktrace' is
4551 ==3336== at 0x40641FE: _L_unlock_669
4552 (pthread_mutex_unlock.c:310)
4553 ==3336== by 0x4064206: _L_unlock_669
4554 (pthread_mutex_unlock.c:310)
4555 ==3336== by 0x4064132: __pthread_mutex_unlock_usercnt
4556 (pthread_mutex_unlock.c:57)
4557 ==3336== by 0x40302BE: pthread_mutex_unlock
4558 (hg_intercepts.c:710)
4559 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14) */
4560 if (reason == NULL) {
4561 if ((i > 0
4562 && sps[i] == sps[i-1] && fps[i] == fps[i-1])
4563 || (i < N_FRAMES-1
4564 && sps[i] == sps[i+1] && fps[i] == fps[i+1])) {
4565 reason = "previous||next frame: identical sp and fp";
4566 }
4567 }
4568 if (reason == NULL) {
4569 if ((i > 0
4570 && fps[i] == fps[i-1])
4571 || (i < N_FRAMES-1
4572 && fps[i] == fps[i+1])) {
4573 reason = "previous||next frame: identical fp";
4574 }
4575 }
4576
4577 /* When we have a read or write 'in the middle of a push instruction',
4578 then the normal backtrace is not very good, while the helgrind
4579 stacktrace is better, as it undoes the not yet fully finished
4580 push instruction before getting the stacktrace. */
4581 if (reason == NULL && thr->hgthread->first_sp_delta != 0) {
4582 reason = "fixupSP probably needed for check stacktrace";
4583 }
4584
4585 /* Unwinding becomes hectic when running the exit handlers.
4586 None of GDB, cached stacktrace and check stacktrace corresponds.
4587 So, if we find __run_exit_handlers, ignore the difference. */
4588 if (reason == NULL) {
4589 const HChar *fnname;
4590 for (UInt f = 0; f < N_FRAMES; f++) {
4591 if (VG_(get_fnname)( cur_ep, frames[f], &fnname)
4592 && VG_(strcmp) ("__run_exit_handlers", fnname) == 0) {
4593 reason = "exit handlers";
4594 break;
4595 }
4596 }
4597 }
4598
4599 // Show what we have found for this difference
4600 if (reason == NULL) {
4601 ok = False;
4602 stats__cached_rcec_diff++;
4603 } else {
4604 ok = True;
4605 stats__cached_rcec_diff_known_reason++;
4606 }
4607 if (!ok || VG_(clo_verbosity) > 2) {
4608 Bool save_show_below_main = VG_(clo_show_below_main);
4609 VG_(clo_show_below_main) = True;
4610 /* The below error msg reports an unexpected diff in 'frame %d'.
4611 The (maybe wrong) pc found in the cached stacktrace is
4612 'cached_pc %p' while an unwind gives the (maybe wrong)
4613 'check_pc %p'.
4614 After, 'previous_frame0 %p' tells where the cached stacktrace
4615 was taken.
4616 This is then followed by the full resulting cache stack trace
4617 and the full stack trace found doing unwind.
4618 Such a diff can have various origins:
4619 * a bug in the unwinder, when the cached stack trace was taken
4620 at 'previous_frame0'
4621 * a bug in the unwinder, when the check stack trace was taken
4622 (i.e. at current pc).
4623 * a missing 'invalidate cache stack trace' somewhere in the
4624 instructions between 'previous_frame0' and current_pc.
4625 To investigate the last case, typically, disass the range of
4626 instructions where an invalidate cached stack might miss. */
4627 VG_(printf)("%s diff tid %d frame %d "
4628 "cached_pc %p check_pc %p\n",
4629 reason ? reason : "unexpected",
4630 thr->hgthread->coretid,
4631 i,
4632 (void*)thr->cached_rcec.frames[i],
4633 (void*)frames[i]);
4634 VG_(printf)("cached stack trace previous_frame0 %p\n",
4635 (void*)previous_frame0);
4636 VG_(pp_StackTrace)(cur_ep, &previous_frame0, 1);
4637 VG_(printf)("resulting cached stack trace:\n");
4638 VG_(pp_StackTrace)(cur_ep, thr->cached_rcec.frames, N_FRAMES);
4639 VG_(printf)("check stack trace:\n");
4640 VG_(pp_StackTrace)(cur_ep, frames, N_FRAMES);
4641
4642 VG_(show_sched_status) (False, // host_stacktrace
4643 False, // stack_usage
4644 False); // exited_threads
4645 if (VG_(clo_vgdb_error) == 1234567890) // HACK TO ALLOW TO DEBUG
4646 VG_(gdbserver) ( thr->hgthread->coretid );
4647 VG_(clo_show_below_main) = save_show_below_main;
4648 }
4649 break; // Stop giving more errors for this stacktrace.
4650 }
4651 }
4652 return ok;
4653 }
4654
4655 __attribute__((noinline))
get_RCEC(Thr * thr)4656 static RCEC* get_RCEC ( Thr* thr )
4657 {
4658 UInt i;
4659 UWord hash;
4660 Addr previous_frame0 = 0; // Assignment needed to silence gcc
4661 RCEC *res;
4662 const Bool thr_cached_rcec_valid = cached_rcec_valid(thr);
4663 const Addr cur_ip = VG_(get_IP)(thr->hgthread->coretid);
4664
4665 if (DEBUG_CACHED_RCEC)
4666 VG_(printf)("get rcec tid %d at IP %p SP %p"
4667 " first_sp_delta %ld cached valid %d\n",
4668 thr->hgthread->coretid,
4669 (void*)cur_ip,
4670 (void*)VG_(get_SP)(thr->hgthread->coretid),
4671 thr->hgthread->first_sp_delta, thr_cached_rcec_valid);
4672
4673 /* If we have a valid cached rcec, derive the new rcec from the cached one
4674 and update the cached one.
4675 Otherwise, compute a fresh rcec. */
4676
4677 if (thr_cached_rcec_valid) {
4678 /* Update the stacktrace of the cached rcec with the current IP */
4679 previous_frame0 = thr->cached_rcec.frames[0];
4680 thr->cached_rcec.frames[0] = cur_ip;
4681
4682 # if defined(VGP_x86_linux)
4683 // See m_stacktrace.c kludge
4684 extern Addr VG_(client__dl_sysinfo_int80);
4685 /// #include pub_core_clientstate needed for the above ????
4686 /// or move the above into a pub_tool_??? tool_stacktrace.h maybe ????
4687 if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
4688 && cur_ip >= VG_(client__dl_sysinfo_int80)
4689 && cur_ip < VG_(client__dl_sysinfo_int80)+3
4690 ) {
4691 thr->cached_rcec.frames[0]
4692 = (ULong) *(Addr*)(UWord)VG_(get_SP)(thr->hgthread->coretid);
4693 }
4694 # endif
4695
4696 if (previous_frame0 == thr->cached_rcec.frames[0])
4697 stats__cached_rcec_identical++;
4698 else
4699 stats__cached_rcec_updated++;
4700 } else {
4701 /* Compute a fresh stacktrace. */
4702 main_get_stacktrace( thr, &thr->cached_rcec.frames[0], N_FRAMES );
4703 if (DEBUG_CACHED_RCEC) {
4704 Bool save_show_below_main = VG_(clo_show_below_main);
4705 VG_(clo_show_below_main) = True;
4706 VG_(printf)("caching stack trace:\n");
4707 VG_(pp_StackTrace)(VG_(current_DiEpoch)(),
4708 &thr->cached_rcec.frames[0], N_FRAMES);
4709 VG_(clo_show_below_main) = save_show_below_main;
4710 }
4711 stats__cached_rcec_fresh++;
4712 }
4713
4714 hash = 0;
4715 for (i = 0; i < N_FRAMES; i++) {
4716 hash ^= thr->cached_rcec.frames[i];
4717 hash = ROLW(hash, 19);
4718 }
4719 thr->cached_rcec.frames_hash = hash;
4720 res = ctxt__find_or_add( &thr->cached_rcec );
4721
4722 if (UNLIKELY(HG_(clo_sanity_flags) & SCE_ACCESS)
4723 && thr_cached_rcec_valid) {
4724 /* In case the cached and check differ, invalidate the cached rcec.
4725 We have less duplicated diffs reported afterwards. */
4726 if (!check_cached_rcec_ok (thr, previous_frame0))
4727 set_cached_rcec_validity(thr, False);
4728 } else {
4729 if (HG_(clo_delta_stacktrace) && !thr_cached_rcec_valid)
4730 set_cached_rcec_validity(thr, True);
4731 }
4732
4733 return res;
4734 }
4735
4736 ///////////////////////////////////////////////////////
4737 //// Part (2):
4738 /// A hashtable guest-addr -> OldRef, that refers to (1)
4739 /// Note: we use the guest address as key. This means that the entries
4740 /// for multiple threads accessing the same address will land in the same
4741 /// bucket. It might be nice to have a better distribution of the
4742 /// OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4743 /// The problem is that when a race is reported on a ga, we need to retrieve
4744 /// efficiently the accesses to ga by other threads, only using the ga.
4745 /// Measurements on firefox have shown that the chain length is reasonable.
4746
4747 /* Records an access: a thread, a context (size & writeness) and the
4748 number of held locks. The size (1,2,4,8) is stored as is in szB.
4749 Note that szB uses more bits than needed to store a size up to 8.
4750 This allows to use a TSW as a fully initialised UInt e.g. in
4751 cmp_oldref_tsw. If needed, a more compact representation of szB
4752 can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4753 size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4754 typedef
4755 struct {
4756 UInt thrid : SCALARTS_N_THRBITS;
4757 UInt szB : 32 - SCALARTS_N_THRBITS - 1;
4758 UInt isW : 1;
4759 } TSW; // Thread+Size+Writeness
4760 typedef
4761 struct {
4762 TSW tsw;
4763 WordSetID locksHeldW;
4764 RCEC* rcec;
4765 }
4766 Thr_n_RCEC;
4767
4768 typedef
4769 struct OldRef {
4770 struct OldRef *ht_next; // to link hash table nodes together.
4771 UWord ga; // hash_table key, == address for which we record an access.
4772 struct OldRef *prev; // to refs older than this one
4773 struct OldRef *next; // to refs newer that this one
4774 UWord stamp; // allows to order (by time of access) 2 OldRef
4775 Thr_n_RCEC acc;
4776 }
4777 OldRef;
4778
4779 /* Returns the or->tsw as an UInt */
oldref_tsw(const OldRef * or)4780 static inline UInt oldref_tsw (const OldRef* or)
4781 {
4782 return *(const UInt*)(&or->acc.tsw);
4783 }
4784
4785 /* Compare the tsw component for 2 OldRef.
4786 Used for OldRef hashtable (which already verifies equality of the
4787 'key' part. */
cmp_oldref_tsw(const void * node1,const void * node2)4788 static Word cmp_oldref_tsw (const void* node1, const void* node2 )
4789 {
4790 const UInt tsw1 = oldref_tsw(node1);
4791 const UInt tsw2 = oldref_tsw(node2);
4792
4793 if (tsw1 < tsw2) return -1;
4794 if (tsw1 > tsw2) return 1;
4795 return 0;
4796 }
4797
4798
4799 //////////// BEGIN OldRef pool allocator
4800 static PoolAlloc* oldref_pool_allocator;
4801 // Note: We only allocate elements in this pool allocator, we never free them.
4802 // We stop allocating elements at VG_(clo_conflict_cache_size).
4803 //////////// END OldRef pool allocator
4804
4805 static OldRef mru;
4806 static OldRef lru;
4807 // A double linked list, chaining all OldREf in a mru/lru order.
4808 // mru/lru are sentinel nodes.
4809 // Whenever an oldref is re-used, its position is changed as the most recently
4810 // used (i.e. pointed to by mru.prev).
4811 // When a new oldref is needed, it is allocated from the pool
4812 // if we have not yet reached --conflict-cache-size.
4813 // Otherwise, if all oldref have already been allocated,
4814 // the least recently used (i.e. pointed to by lru.next) is re-used.
4815 // When an OldRef is used, it is moved as the most recently used entry
4816 // (i.e. pointed to by mru.prev).
4817
4818 // Removes r from the double linked list
4819 // Note: we do not need to test for special cases such as
4820 // NULL next or prev pointers, because we have sentinel nodes
4821 // at both sides of the list. So, a node is always forward and
4822 // backward linked.
OldRef_unchain(OldRef * r)4823 static inline void OldRef_unchain(OldRef *r)
4824 {
4825 r->next->prev = r->prev;
4826 r->prev->next = r->next;
4827 }
4828
4829 // Insert new as the newest OldRef
4830 // Similarly to OldRef_unchain, no need to test for NULL
4831 // pointers, as e.g. mru.prev is always guaranteed to point
4832 // to a non NULL node (lru when the list is empty).
OldRef_newest(OldRef * new)4833 static inline void OldRef_newest(OldRef *new)
4834 {
4835 new->next = &mru;
4836 new->prev = mru.prev;
4837 mru.prev = new;
4838 new->prev->next = new;
4839 }
4840
4841
4842 static VgHashTable* oldrefHT = NULL; /* Hash table* OldRef* */
4843 static UWord oldrefHTN = 0; /* # elems in oldrefHT */
4844 /* Note: the nr of ref in the oldrefHT will always be equal to
4845 the nr of elements that were allocated from the OldRef pool allocator
4846 as we never free an OldRef : we just re-use them. */
4847
4848
4849 /* allocates a new OldRef or re-use the lru one if all allowed OldRef
4850 have already been allocated. */
alloc_or_reuse_OldRef(void)4851 static OldRef* alloc_or_reuse_OldRef ( void )
4852 {
4853 if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4854 oldrefHTN++;
4855 return VG_(allocEltPA) ( oldref_pool_allocator );
4856 } else {
4857 OldRef *oldref_ht;
4858 OldRef *oldref = lru.next;
4859
4860 OldRef_unchain(oldref);
4861 oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4862 tl_assert (oldref == oldref_ht);
4863 ctxt__rcdec( oldref->acc.rcec );
4864 return oldref;
4865 }
4866 }
4867
4868
min_UInt(UInt a,UInt b)4869 inline static UInt min_UInt ( UInt a, UInt b ) {
4870 return a < b ? a : b;
4871 }
4872
4873 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4874 first interval is lower, 1 if the first interval is higher, and 0
4875 if there is any overlap. Redundant paranoia with casting is there
4876 following what looked distinctly like a bug in gcc-4.1.2, in which
4877 some of the comparisons were done signedly instead of
4878 unsignedly. */
4879 /* Copied from exp-ptrcheck/sg_main.c */
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)4880 static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4881 Addr a2, SizeT n2 ) {
4882 UWord a1w = (UWord)a1;
4883 UWord n1w = (UWord)n1;
4884 UWord a2w = (UWord)a2;
4885 UWord n2w = (UWord)n2;
4886 tl_assert(n1w > 0 && n2w > 0);
4887 if (a1w + n1w <= a2w) return -1L;
4888 if (a2w + n2w <= a1w) return 1L;
4889 return 0;
4890 }
4891
4892 static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4893
event_map_bind(Addr a,SizeT szB,Bool isW,Thr * thr)4894 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4895 {
4896 OldRef example;
4897 OldRef* ref;
4898 RCEC* rcec;
4899
4900 tl_assert(thr);
4901 ThrID thrid = thr->thrid;
4902 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4903
4904 WordSetID locksHeldW = thr->hgthread->locksetW;
4905
4906 rcec = get_RCEC( thr );
4907
4908 /* Look in the oldrefHT to see if we already have a record for this
4909 address/thr/sz/isW. */
4910 example.ga = a;
4911 example.acc.tsw = (TSW) {.thrid = thrid,
4912 .szB = szB,
4913 .isW = (UInt)(isW & 1)};
4914 ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
4915
4916 if (ref) {
4917 /* We already have a record for this address and this (thrid, R/W,
4918 size) triple. */
4919 tl_assert (ref->ga == a);
4920
4921 /* thread 'thr' has an entry. Update its RCEC, if it differs. */
4922 if (rcec == ref->acc.rcec)
4923 stats__ctxt_eq_tsw_eq_rcec++;
4924 else {
4925 stats__ctxt_eq_tsw_neq_rcec++;
4926 ctxt__rcdec( ref->acc.rcec );
4927 ctxt__rcinc(rcec);
4928 ref->acc.rcec = rcec;
4929 }
4930 tl_assert(ref->acc.tsw.thrid == thrid);
4931 /* Update the stamp, RCEC and the W-held lockset. */
4932 ref->stamp = event_map_stamp;
4933 ref->acc.locksHeldW = locksHeldW;
4934
4935 OldRef_unchain(ref);
4936 OldRef_newest(ref);
4937
4938 } else {
4939 tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4940 // We only need to check the size the first time we insert a ref.
4941 // Check for most frequent cases first
4942 // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
4943
4944 /* We don't have a record for this address+triple. Create a new one. */
4945 stats__ctxt_neq_tsw_neq_rcec++;
4946 ref = alloc_or_reuse_OldRef();
4947 ref->ga = a;
4948 ref->acc.tsw = (TSW) {.thrid = thrid,
4949 .szB = szB,
4950 .isW = (UInt)(isW & 1)};
4951 ref->stamp = event_map_stamp;
4952 ref->acc.locksHeldW = locksHeldW;
4953 ref->acc.rcec = rcec;
4954 ctxt__rcinc(rcec);
4955
4956 VG_(HT_add_node) ( oldrefHT, ref );
4957 OldRef_newest (ref);
4958 }
4959 event_map_stamp++;
4960 }
4961
4962
4963 /* Extract info from the conflicting-access machinery.
4964 Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
libhb_event_map_lookup(ExeContext ** resEC,Thr ** resThr,SizeT * resSzB,Bool * resIsW,WordSetID * locksHeldW,Thr * thr,Addr a,SizeT szB,Bool isW)4965 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4966 /*OUT*/Thr** resThr,
4967 /*OUT*/SizeT* resSzB,
4968 /*OUT*/Bool* resIsW,
4969 /*OUT*/WordSetID* locksHeldW,
4970 Thr* thr, Addr a, SizeT szB, Bool isW )
4971 {
4972 Word i, j;
4973 OldRef *ref = NULL;
4974 SizeT ref_szB = 0;
4975
4976 OldRef *cand_ref;
4977 SizeT cand_ref_szB;
4978 Addr cand_a;
4979
4980 Addr toCheck[15];
4981 Int nToCheck = 0;
4982
4983 tl_assert(thr);
4984 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
4985
4986 ThrID thrid = thr->thrid;
4987
4988 toCheck[nToCheck++] = a;
4989 for (i = -7; i < (Word)szB; i++) {
4990 if (i != 0)
4991 toCheck[nToCheck++] = a + i;
4992 }
4993 tl_assert(nToCheck <= 15);
4994
4995 /* Now see if we can find a suitable matching event for
4996 any of the addresses in toCheck[0 .. nToCheck-1]. */
4997 for (j = 0; j < nToCheck; j++) {
4998
4999 cand_a = toCheck[j];
5000 // VG_(printf)("test %ld %p\n", j, cand_a);
5001
5002 /* Find the first HT element for this address.
5003 We might have several of these. They will be linked via ht_next.
5004 We however need to check various elements as the list contains
5005 all elements that map to the same bucket. */
5006 for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
5007 cand_ref; cand_ref = cand_ref->ht_next) {
5008 if (cand_ref->ga != cand_a)
5009 /* OldRef for another address in this HT bucket. Ignore. */
5010 continue;
5011
5012 if (cand_ref->acc.tsw.thrid == thrid)
5013 /* This is an access by the same thread, but we're only
5014 interested in accesses from other threads. Ignore. */
5015 continue;
5016
5017 if ((!cand_ref->acc.tsw.isW) && (!isW))
5018 /* We don't want to report a read racing against another
5019 read; that's stupid. So in this case move on. */
5020 continue;
5021
5022 cand_ref_szB = cand_ref->acc.tsw.szB;
5023 if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
5024 /* No overlap with the access we're asking about. Ignore. */
5025 continue;
5026
5027 /* We have a match. Keep this match if it is newer than
5028 the previous match. Note that stamp are Unsigned Words, and
5029 for long running applications, event_map_stamp might have cycled.
5030 So, 'roll' each stamp using event_map_stamp to have the
5031 stamps in the good order, in case event_map_stamp recycled. */
5032 if (!ref
5033 || (ref->stamp - event_map_stamp)
5034 < (cand_ref->stamp - event_map_stamp)) {
5035 ref = cand_ref;
5036 ref_szB = cand_ref_szB;
5037 }
5038 }
5039
5040 if (ref) {
5041 /* return with success */
5042 Int n, maxNFrames;
5043 RCEC* ref_rcec = ref->acc.rcec;
5044 tl_assert(ref->acc.tsw.thrid);
5045 tl_assert(ref_rcec);
5046 tl_assert(ref_rcec->magic == RCEC_MAGIC);
5047 tl_assert(ref_szB >= 1);
5048 /* Count how many non-zero frames we have. */
5049 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
5050 for (n = 0; n < maxNFrames; n++) {
5051 if (0 == ref_rcec->frames[n]) break;
5052 }
5053 *resEC = VG_(make_ExeContext_from_StackTrace)(ref_rcec->frames,
5054 n);
5055 *resThr = Thr__from_ThrID(ref->acc.tsw.thrid);
5056 *resSzB = ref_szB;
5057 *resIsW = ref->acc.tsw.isW;
5058 *locksHeldW = ref->acc.locksHeldW;
5059 stats__evm__lookup_found++;
5060 return True;
5061 }
5062
5063 /* consider next address in toCheck[] */
5064 } /* for (j = 0; j < nToCheck; j++) */
5065
5066 /* really didn't find anything. */
5067 stats__evm__lookup_notfound++;
5068 return False;
5069 }
5070
5071
libhb_event_map_access_history(Addr a,SizeT szB,Access_t fn)5072 void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
5073 {
5074 OldRef *ref = lru.next;
5075 SizeT ref_szB;
5076 Int n;
5077
5078 while (ref != &mru) {
5079 ref_szB = ref->acc.tsw.szB;
5080 if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
5081 RCEC* ref_rcec = ref->acc.rcec;
5082 for (n = 0; n < N_FRAMES; n++) {
5083 if (0 == ref_rcec->frames[n]) {
5084 break;
5085 }
5086 }
5087 (*fn)(ref_rcec->frames, n,
5088 Thr__from_ThrID(ref->acc.tsw.thrid),
5089 ref->ga,
5090 ref_szB,
5091 ref->acc.tsw.isW,
5092 ref->acc.locksHeldW);
5093 }
5094 tl_assert (ref->next == &mru
5095 || ((ref->stamp - event_map_stamp)
5096 < ref->next->stamp - event_map_stamp));
5097 ref = ref->next;
5098 }
5099 }
5100
event_map_init(void)5101 static void event_map_init ( void )
5102 {
5103 Word i;
5104
5105 /* Context (RCEC) pool allocator */
5106 rcec_pool_allocator = VG_(newPA) (
5107 sizeof(RCEC),
5108 1000 /* RCECs per pool */,
5109 HG_(zalloc),
5110 "libhb.event_map_init.1 (RCEC pools)",
5111 HG_(free)
5112 );
5113
5114 /* Context table */
5115 tl_assert(!contextTab);
5116 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
5117 N_RCEC_TAB * sizeof(RCEC*) );
5118 for (i = 0; i < N_RCEC_TAB; i++)
5119 contextTab[i] = NULL;
5120
5121 /* Oldref pool allocator */
5122 oldref_pool_allocator = VG_(newPA)(
5123 sizeof(OldRef),
5124 1000 /* OldRefs per pool */,
5125 HG_(zalloc),
5126 "libhb.event_map_init.3 (OldRef pools)",
5127 HG_(free)
5128 );
5129
5130 /* Oldref hashtable */
5131 tl_assert(!oldrefHT);
5132 oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
5133
5134 oldrefHTN = 0;
5135 mru.prev = &lru;
5136 mru.next = NULL;
5137 lru.prev = NULL;
5138 lru.next = &mru;
5139 mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
5140 .szB = 0,
5141 .isW = 0},
5142 .locksHeldW = 0,
5143 .rcec = NULL};
5144 lru.acc = mru.acc;
5145 }
5146
event_map__check_reference_counts(void)5147 static void event_map__check_reference_counts ( void )
5148 {
5149 RCEC* rcec;
5150 OldRef* oldref;
5151 Word i;
5152 UWord nEnts = 0;
5153
5154 /* Set the 'check' reference counts to zero. Also, optionally
5155 check that the real reference counts are non-zero. We allow
5156 these to fall to zero before a GC, but the GC must get rid of
5157 all those that are zero, hence none should be zero after a
5158 GC. */
5159 for (i = 0; i < N_RCEC_TAB; i++) {
5160 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5161 nEnts++;
5162 tl_assert(rcec);
5163 tl_assert(rcec->magic == RCEC_MAGIC);
5164 rcec->rcX = 0;
5165 }
5166 }
5167
5168 /* check that the stats are sane */
5169 tl_assert(nEnts == stats__ctxt_tab_curr);
5170 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
5171
5172 /* visit all the referencing points, inc check ref counts */
5173 VG_(HT_ResetIter)( oldrefHT );
5174 oldref = VG_(HT_Next)( oldrefHT );
5175 while (oldref) {
5176 tl_assert (oldref->acc.tsw.thrid);
5177 tl_assert (oldref->acc.rcec);
5178 tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
5179 oldref->acc.rcec->rcX++;
5180 oldref = VG_(HT_Next)( oldrefHT );
5181 }
5182
5183 /* compare check ref counts with actual */
5184 for (i = 0; i < N_RCEC_TAB; i++) {
5185 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5186 tl_assert(rcec->rc == rcec->rcX);
5187 }
5188 }
5189 }
5190
5191 __attribute__((noinline))
do_RCEC_GC(void)5192 static void do_RCEC_GC ( void )
5193 {
5194 UInt i;
5195
5196 if (VG_(clo_stats)) {
5197 static UInt ctr = 1;
5198 VG_(message)(Vg_DebugMsg,
5199 "libhb: RCEC GC: #%u %lu slots,"
5200 " %lu cur ents(ref'd %lu),"
5201 " %lu max ents\n",
5202 ctr++,
5203 (UWord)N_RCEC_TAB,
5204 stats__ctxt_tab_curr, RCEC_referenced,
5205 stats__ctxt_tab_max );
5206 }
5207 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
5208
5209 /* Throw away all RCECs with zero reference counts */
5210 for (i = 0; i < N_RCEC_TAB; i++) {
5211 RCEC** pp = &contextTab[i];
5212 RCEC* p = *pp;
5213 while (p) {
5214 if (p->rc == 0) {
5215 *pp = p->next;
5216 free_RCEC(p);
5217 p = *pp;
5218 tl_assert(stats__ctxt_tab_curr > 0);
5219 stats__ctxt_rcec_gc_discards++;
5220 stats__ctxt_tab_curr--;
5221 } else {
5222 pp = &p->next;
5223 p = p->next;
5224 }
5225 }
5226 }
5227
5228 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
5229 }
5230
5231 /////////////////////////////////////////////////////////
5232 // //
5233 // Core MSM //
5234 // //
5235 /////////////////////////////////////////////////////////
5236
5237 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
5238 Nov 08, and again after [...],
5239 June 09. */
5240
5241 static ULong stats__msmcread = 0;
5242 static ULong stats__msmcread_change = 0;
5243 static ULong stats__msmcwrite = 0;
5244 static ULong stats__msmcwrite_change = 0;
5245
5246 /* Some notes on the H1 history mechanism:
5247
5248 Transition rules are:
5249
5250 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
5251 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
5252
5253 After any access by a thread T to a location L, L's constraint pair
5254 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
5255
5256 After a race by thread T conflicting with some previous access by
5257 some other thread U, for a location with constraint (before
5258 processing the later access) (Cr,Cw), then Cw[U] is the segment in
5259 which the previously access lies.
5260
5261 Hence in record_race_info, we pass in Cfailed and Kfailed, which
5262 are compared so as to find out which thread(s) this access
5263 conflicts with. Once that is established, we also require the
5264 pre-update Cw for the location, so we can index into it for those
5265 threads, to get the scalar clock values for the point at which the
5266 former accesses were made. (In fact we only bother to do any of
5267 this for an arbitrarily chosen one of the conflicting threads, as
5268 that's simpler, it avoids flooding the user with vast amounts of
5269 mostly useless information, and because the program is wrong if it
5270 contains any races at all -- so we don't really need to show all
5271 conflicting access pairs initially, so long as we only show none if
5272 none exist).
5273
5274 ---
5275
5276 That requires the auxiliary proof that
5277
5278 (Cr `join` Kw)[T] == Kw[T]
5279
5280 Why should that be true? Because for any thread T, Kw[T] >= the
5281 scalar clock value for T known by any other thread. In other
5282 words, because T's value for its own scalar clock is at least as up
5283 to date as the value for it known by any other thread (that is true
5284 for both the R- and W- scalar clocks). Hence no other thread will
5285 be able to feed in a value for that element (indirectly via a
5286 constraint) which will exceed Kw[T], and hence the join cannot
5287 cause that particular element to advance.
5288 */
5289
5290 __attribute__((noinline))
record_race_info(Thr * acc_thr,Addr acc_addr,SizeT szB,Bool isWrite,VtsID Cfailed,VtsID Kfailed,VtsID Cw)5291 static void record_race_info ( Thr* acc_thr,
5292 Addr acc_addr, SizeT szB, Bool isWrite,
5293 VtsID Cfailed,
5294 VtsID Kfailed,
5295 VtsID Cw )
5296 {
5297 /* Call here to report a race. We just hand it onwards to
5298 HG_(record_error_Race). If that in turn discovers that the
5299 error is going to be collected, then, at history_level 2, that
5300 queries the conflicting-event map. The alternative would be to
5301 query it right here. But that causes a lot of pointless queries
5302 for errors which will shortly be discarded as duplicates, and
5303 can become a performance overhead; so we defer the query until
5304 we know the error is not a duplicate. */
5305
5306 /* Stacks for the bounds of the (or one of the) conflicting
5307 segment(s). These are only set at history_level 1. */
5308 ExeContext* hist1_seg_start = NULL;
5309 ExeContext* hist1_seg_end = NULL;
5310 Thread* hist1_conf_thr = NULL;
5311
5312 tl_assert(acc_thr);
5313 tl_assert(acc_thr->hgthread);
5314 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
5315 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5316
5317 if (HG_(clo_history_level) == 1) {
5318 Bool found;
5319 Word firstIx, lastIx;
5320 ULong_n_EC key;
5321
5322 /* At history_level 1, we must round up the relevant stack-pair
5323 for the conflicting segment right now. This is because
5324 deferring it is complex; we can't (easily) put Kfailed and
5325 Cfailed into the XError and wait for later without
5326 getting tied up in difficulties with VtsID reference
5327 counting. So just do it now. */
5328 Thr* confThr;
5329 ULong confTym = 0;
5330 /* Which thread are we in conflict with? There may be more than
5331 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5332 (in fact it's the one with the lowest Thr* value). */
5333 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
5334 /* This must exist! since if it was NULL then there's no
5335 conflict (semantics of return value of
5336 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5337 called us, just checked exactly this -- that there was in
5338 fact a race. */
5339 tl_assert(confThr);
5340
5341 /* Get the scalar clock value that the conflicting thread
5342 introduced into the constraint. A careful examination of the
5343 base machine rules shows that this must be the same as the
5344 conflicting thread's scalar clock when it created this
5345 constraint. Hence we know the scalar clock of the
5346 conflicting thread when the conflicting access was made. */
5347 confTym = VtsID__indexAt( Cfailed, confThr );
5348
5349 /* Using this scalar clock, index into the conflicting thread's
5350 collection of stack traces made each time its vector clock
5351 (hence its scalar clock) changed. This gives the stack
5352 traces at the start and end of the conflicting segment (well,
5353 as per comment just above, of one of the conflicting
5354 segments, if there are more than one). */
5355 key.ull = confTym;
5356 key.ec = NULL;
5357 /* tl_assert(confThr); -- asserted just above */
5358 tl_assert(confThr->local_Kws_n_stacks);
5359 firstIx = lastIx = 0;
5360 found = VG_(lookupXA_UNSAFE)(
5361 confThr->local_Kws_n_stacks,
5362 &key, &firstIx, &lastIx,
5363 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
5364 );
5365 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
5366 "confTym %llu found %d (%ld,%ld)\n",
5367 Cfailed, Kfailed, Cw,
5368 confThr, confTym, found, firstIx, lastIx);
5369 /* We can't indefinitely collect stack traces at VTS
5370 transitions, since we'd eventually run out of memory. Hence
5371 note_local_Kw_n_stack_for will eventually throw away old
5372 ones, which in turn means we might fail to find index value
5373 confTym in the array. */
5374 if (found) {
5375 ULong_n_EC *pair_start, *pair_end;
5376 pair_start
5377 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
5378 hist1_seg_start = pair_start->ec;
5379 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
5380 pair_end
5381 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
5382 lastIx+1 );
5383 /* from properties of VG_(lookupXA) and the comparison fn used: */
5384 tl_assert(pair_start->ull < pair_end->ull);
5385 hist1_seg_end = pair_end->ec;
5386 /* Could do a bit better here. It may be that pair_end
5387 doesn't have a stack, but the following entries in the
5388 array have the same scalar Kw and to have a stack. So
5389 we should search a bit further along the array than
5390 lastIx+1 if hist1_seg_end is NULL. */
5391 } else {
5392 if (!confThr->llexit_done)
5393 hist1_seg_end = main_get_EC( confThr );
5394 }
5395 // seg_start could be NULL iff this is the first stack in the thread
5396 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5397 //if (seg_end) VG_(pp_ExeContext)(seg_end);
5398 hist1_conf_thr = confThr->hgthread;
5399 }
5400 }
5401
5402 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
5403 szB, isWrite,
5404 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
5405 }
5406
is_sane_SVal_C(SVal sv)5407 static Bool is_sane_SVal_C ( SVal sv ) {
5408 Bool leq;
5409 if (!SVal__isC(sv)) return True;
5410 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5411 return leq;
5412 }
5413
5414
5415 /* Compute new state following a read */
msmcread(SVal svOld,Thr * acc_thr,Addr acc_addr,SizeT szB)5416 static inline SVal msmcread ( SVal svOld,
5417 /* The following are only needed for
5418 creating error reports. */
5419 Thr* acc_thr,
5420 Addr acc_addr, SizeT szB )
5421 {
5422 SVal svNew = SVal_INVALID;
5423 stats__msmcread++;
5424
5425 /* Redundant sanity check on the constraints */
5426 if (CHECK_MSM) {
5427 tl_assert(is_sane_SVal_C(svOld));
5428 }
5429
5430 if (LIKELY(SVal__isC(svOld))) {
5431 VtsID tviR = acc_thr->viR;
5432 VtsID tviW = acc_thr->viW;
5433 VtsID rmini = SVal__unC_Rmin(svOld);
5434 VtsID wmini = SVal__unC_Wmin(svOld);
5435 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5436 if (LIKELY(leq)) {
5437 /* no race */
5438 /* Note: RWLOCK subtlety: use tviW, not tviR */
5439 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5440 goto out;
5441 } else {
5442 /* assert on sanity of constraints. */
5443 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5444 tl_assert(leqxx);
5445 // same as in non-race case
5446 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5447 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
5448 rmini, /* Cfailed */
5449 tviR, /* Kfailed */
5450 wmini /* Cw */ );
5451 goto out;
5452 }
5453 }
5454 if (SVal__isA(svOld)) {
5455 /* reading no-access memory (sigh); leave unchanged */
5456 /* check for no pollution */
5457 tl_assert(svOld == SVal_NOACCESS);
5458 svNew = SVal_NOACCESS;
5459 goto out;
5460 }
5461 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
5462 tl_assert(0);
5463
5464 out:
5465 if (CHECK_MSM) {
5466 tl_assert(is_sane_SVal_C(svNew));
5467 }
5468 if (UNLIKELY(svNew != svOld)) {
5469 tl_assert(svNew != SVal_INVALID);
5470 if (HG_(clo_history_level) >= 2
5471 && SVal__isC(svOld) && SVal__isC(svNew)) {
5472 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5473 stats__msmcread_change++;
5474 }
5475 }
5476 return svNew;
5477 }
5478
5479
5480 /* Compute new state following a write */
msmcwrite(SVal svOld,Thr * acc_thr,Addr acc_addr,SizeT szB)5481 static inline SVal msmcwrite ( SVal svOld,
5482 /* The following are only needed for
5483 creating error reports. */
5484 Thr* acc_thr,
5485 Addr acc_addr, SizeT szB )
5486 {
5487 SVal svNew = SVal_INVALID;
5488 stats__msmcwrite++;
5489
5490 /* Redundant sanity check on the constraints */
5491 if (CHECK_MSM) {
5492 tl_assert(is_sane_SVal_C(svOld));
5493 }
5494
5495 if (LIKELY(SVal__isC(svOld))) {
5496 VtsID tviW = acc_thr->viW;
5497 VtsID wmini = SVal__unC_Wmin(svOld);
5498 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5499 if (LIKELY(leq)) {
5500 /* no race */
5501 svNew = SVal__mkC( tviW, tviW );
5502 goto out;
5503 } else {
5504 VtsID rmini = SVal__unC_Rmin(svOld);
5505 /* assert on sanity of constraints. */
5506 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5507 tl_assert(leqxx);
5508 // same as in non-race case
5509 // proof: in the non-race case, we have
5510 // rmini <= wmini (invar on constraints)
5511 // tviW <= tviR (invar on thread clocks)
5512 // wmini <= tviW (from run-time check)
5513 // hence from transitivity of <= we have
5514 // rmini <= wmini <= tviW
5515 // and so join(rmini,tviW) == tviW
5516 // and join(wmini,tviW) == tviW
5517 // qed.
5518 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5519 VtsID__join2(wmini, tviW) );
5520 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5521 wmini, /* Cfailed */
5522 tviW, /* Kfailed */
5523 wmini /* Cw */ );
5524 goto out;
5525 }
5526 }
5527 if (SVal__isA(svOld)) {
5528 /* writing no-access memory (sigh); leave unchanged */
5529 /* check for no pollution */
5530 tl_assert(svOld == SVal_NOACCESS);
5531 svNew = SVal_NOACCESS;
5532 goto out;
5533 }
5534 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5535 tl_assert(0);
5536
5537 out:
5538 if (CHECK_MSM) {
5539 tl_assert(is_sane_SVal_C(svNew));
5540 }
5541 if (UNLIKELY(svNew != svOld)) {
5542 tl_assert(svNew != SVal_INVALID);
5543 if (HG_(clo_history_level) >= 2
5544 && SVal__isC(svOld) && SVal__isC(svNew)) {
5545 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5546 stats__msmcwrite_change++;
5547 }
5548 }
5549 return svNew;
5550 }
5551
5552
5553 /////////////////////////////////////////////////////////
5554 // //
5555 // Apply core MSM to specific memory locations //
5556 // //
5557 /////////////////////////////////////////////////////////
5558
5559 /*------------- ZSM accesses: 8 bit sapply ------------- */
5560
zsm_sapply08__msmcread(Thr * thr,Addr a)5561 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5562 CacheLine* cl;
5563 UWord cloff, tno, toff;
5564 SVal svOld, svNew;
5565 UShort descr;
5566 stats__cline_cread08s++;
5567 cl = get_cacheline(a);
5568 cloff = get_cacheline_offset(a);
5569 tno = get_treeno(a);
5570 toff = get_tree_offset(a); /* == 0 .. 7 */
5571 descr = cl->descrs[tno];
5572 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5573 SVal* tree = &cl->svals[tno << 3];
5574 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5575 if (CHECK_ZSM)
5576 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5577 }
5578 svOld = cl->svals[cloff];
5579 svNew = msmcread( svOld, thr,a,1 );
5580 if (CHECK_ZSM)
5581 tl_assert(svNew != SVal_INVALID);
5582 cl->svals[cloff] = svNew;
5583 }
5584
zsm_sapply08__msmcwrite(Thr * thr,Addr a)5585 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5586 CacheLine* cl;
5587 UWord cloff, tno, toff;
5588 SVal svOld, svNew;
5589 UShort descr;
5590 stats__cline_cwrite08s++;
5591 cl = get_cacheline(a);
5592 cloff = get_cacheline_offset(a);
5593 tno = get_treeno(a);
5594 toff = get_tree_offset(a); /* == 0 .. 7 */
5595 descr = cl->descrs[tno];
5596 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5597 SVal* tree = &cl->svals[tno << 3];
5598 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5599 if (CHECK_ZSM)
5600 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5601 }
5602 svOld = cl->svals[cloff];
5603 svNew = msmcwrite( svOld, thr,a,1 );
5604 if (CHECK_ZSM)
5605 tl_assert(svNew != SVal_INVALID);
5606 cl->svals[cloff] = svNew;
5607 }
5608
5609 /*------------- ZSM accesses: 16 bit sapply ------------- */
5610
zsm_sapply16__msmcread(Thr * thr,Addr a)5611 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5612 CacheLine* cl;
5613 UWord cloff, tno, toff;
5614 SVal svOld, svNew;
5615 UShort descr;
5616 stats__cline_cread16s++;
5617 if (UNLIKELY(!aligned16(a))) goto slowcase;
5618 cl = get_cacheline(a);
5619 cloff = get_cacheline_offset(a);
5620 tno = get_treeno(a);
5621 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5622 descr = cl->descrs[tno];
5623 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5624 if (valid_value_is_below_me_16(descr, toff)) {
5625 goto slowcase;
5626 } else {
5627 SVal* tree = &cl->svals[tno << 3];
5628 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5629 }
5630 if (CHECK_ZSM)
5631 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5632 }
5633 svOld = cl->svals[cloff];
5634 svNew = msmcread( svOld, thr,a,2 );
5635 if (CHECK_ZSM)
5636 tl_assert(svNew != SVal_INVALID);
5637 cl->svals[cloff] = svNew;
5638 return;
5639 slowcase: /* misaligned, or must go further down the tree */
5640 stats__cline_16to8splits++;
5641 zsm_sapply08__msmcread( thr, a + 0 );
5642 zsm_sapply08__msmcread( thr, a + 1 );
5643 }
5644
zsm_sapply16__msmcwrite(Thr * thr,Addr a)5645 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5646 CacheLine* cl;
5647 UWord cloff, tno, toff;
5648 SVal svOld, svNew;
5649 UShort descr;
5650 stats__cline_cwrite16s++;
5651 if (UNLIKELY(!aligned16(a))) goto slowcase;
5652 cl = get_cacheline(a);
5653 cloff = get_cacheline_offset(a);
5654 tno = get_treeno(a);
5655 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5656 descr = cl->descrs[tno];
5657 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5658 if (valid_value_is_below_me_16(descr, toff)) {
5659 goto slowcase;
5660 } else {
5661 SVal* tree = &cl->svals[tno << 3];
5662 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5663 }
5664 if (CHECK_ZSM)
5665 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5666 }
5667 svOld = cl->svals[cloff];
5668 svNew = msmcwrite( svOld, thr,a,2 );
5669 if (CHECK_ZSM)
5670 tl_assert(svNew != SVal_INVALID);
5671 cl->svals[cloff] = svNew;
5672 return;
5673 slowcase: /* misaligned, or must go further down the tree */
5674 stats__cline_16to8splits++;
5675 zsm_sapply08__msmcwrite( thr, a + 0 );
5676 zsm_sapply08__msmcwrite( thr, a + 1 );
5677 }
5678
5679 /*------------- ZSM accesses: 32 bit sapply ------------- */
5680
zsm_sapply32__msmcread(Thr * thr,Addr a)5681 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5682 CacheLine* cl;
5683 UWord cloff, tno, toff;
5684 SVal svOld, svNew;
5685 UShort descr;
5686 stats__cline_cread32s++;
5687 if (UNLIKELY(!aligned32(a))) goto slowcase;
5688 cl = get_cacheline(a);
5689 cloff = get_cacheline_offset(a);
5690 tno = get_treeno(a);
5691 toff = get_tree_offset(a); /* == 0 or 4 */
5692 descr = cl->descrs[tno];
5693 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5694 if (valid_value_is_above_me_32(descr, toff)) {
5695 SVal* tree = &cl->svals[tno << 3];
5696 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5697 } else {
5698 goto slowcase;
5699 }
5700 if (CHECK_ZSM)
5701 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5702 }
5703 svOld = cl->svals[cloff];
5704 svNew = msmcread( svOld, thr,a,4 );
5705 if (CHECK_ZSM)
5706 tl_assert(svNew != SVal_INVALID);
5707 cl->svals[cloff] = svNew;
5708 return;
5709 slowcase: /* misaligned, or must go further down the tree */
5710 stats__cline_32to16splits++;
5711 zsm_sapply16__msmcread( thr, a + 0 );
5712 zsm_sapply16__msmcread( thr, a + 2 );
5713 }
5714
zsm_sapply32__msmcwrite(Thr * thr,Addr a)5715 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5716 CacheLine* cl;
5717 UWord cloff, tno, toff;
5718 SVal svOld, svNew;
5719 UShort descr;
5720 stats__cline_cwrite32s++;
5721 if (UNLIKELY(!aligned32(a))) goto slowcase;
5722 cl = get_cacheline(a);
5723 cloff = get_cacheline_offset(a);
5724 tno = get_treeno(a);
5725 toff = get_tree_offset(a); /* == 0 or 4 */
5726 descr = cl->descrs[tno];
5727 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5728 if (valid_value_is_above_me_32(descr, toff)) {
5729 SVal* tree = &cl->svals[tno << 3];
5730 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5731 } else {
5732 goto slowcase;
5733 }
5734 if (CHECK_ZSM)
5735 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5736 }
5737 svOld = cl->svals[cloff];
5738 svNew = msmcwrite( svOld, thr,a,4 );
5739 if (CHECK_ZSM)
5740 tl_assert(svNew != SVal_INVALID);
5741 cl->svals[cloff] = svNew;
5742 return;
5743 slowcase: /* misaligned, or must go further down the tree */
5744 stats__cline_32to16splits++;
5745 zsm_sapply16__msmcwrite( thr, a + 0 );
5746 zsm_sapply16__msmcwrite( thr, a + 2 );
5747 }
5748
5749 /*------------- ZSM accesses: 64 bit sapply ------------- */
5750
zsm_sapply64__msmcread(Thr * thr,Addr a)5751 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5752 CacheLine* cl;
5753 UWord cloff, tno;
5754 //UWord toff;
5755 SVal svOld, svNew;
5756 UShort descr;
5757 stats__cline_cread64s++;
5758 if (UNLIKELY(!aligned64(a))) goto slowcase;
5759 cl = get_cacheline(a);
5760 cloff = get_cacheline_offset(a);
5761 tno = get_treeno(a);
5762 //toff = get_tree_offset(a); /* == 0, unused */
5763 descr = cl->descrs[tno];
5764 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5765 goto slowcase;
5766 }
5767 svOld = cl->svals[cloff];
5768 svNew = msmcread( svOld, thr,a,8 );
5769 if (CHECK_ZSM)
5770 tl_assert(svNew != SVal_INVALID);
5771 cl->svals[cloff] = svNew;
5772 return;
5773 slowcase: /* misaligned, or must go further down the tree */
5774 stats__cline_64to32splits++;
5775 zsm_sapply32__msmcread( thr, a + 0 );
5776 zsm_sapply32__msmcread( thr, a + 4 );
5777 }
5778
zsm_sapply64__msmcwrite(Thr * thr,Addr a)5779 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5780 CacheLine* cl;
5781 UWord cloff, tno;
5782 //UWord toff;
5783 SVal svOld, svNew;
5784 UShort descr;
5785 stats__cline_cwrite64s++;
5786 if (UNLIKELY(!aligned64(a))) goto slowcase;
5787 cl = get_cacheline(a);
5788 cloff = get_cacheline_offset(a);
5789 tno = get_treeno(a);
5790 //toff = get_tree_offset(a); /* == 0, unused */
5791 descr = cl->descrs[tno];
5792 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5793 goto slowcase;
5794 }
5795 svOld = cl->svals[cloff];
5796 svNew = msmcwrite( svOld, thr,a,8 );
5797 if (CHECK_ZSM)
5798 tl_assert(svNew != SVal_INVALID);
5799 cl->svals[cloff] = svNew;
5800 return;
5801 slowcase: /* misaligned, or must go further down the tree */
5802 stats__cline_64to32splits++;
5803 zsm_sapply32__msmcwrite( thr, a + 0 );
5804 zsm_sapply32__msmcwrite( thr, a + 4 );
5805 }
5806
5807 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5808
5809 static
zsm_swrite08(Addr a,SVal svNew)5810 void zsm_swrite08 ( Addr a, SVal svNew ) {
5811 CacheLine* cl;
5812 UWord cloff, tno, toff;
5813 UShort descr;
5814 stats__cline_swrite08s++;
5815 cl = get_cacheline(a);
5816 cloff = get_cacheline_offset(a);
5817 tno = get_treeno(a);
5818 toff = get_tree_offset(a); /* == 0 .. 7 */
5819 descr = cl->descrs[tno];
5820 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5821 SVal* tree = &cl->svals[tno << 3];
5822 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5823 if (CHECK_ZSM)
5824 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5825 }
5826 tl_assert(svNew != SVal_INVALID);
5827 cl->svals[cloff] = svNew;
5828 }
5829
5830 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5831
5832 static
zsm_swrite16(Addr a,SVal svNew)5833 void zsm_swrite16 ( Addr a, SVal svNew ) {
5834 CacheLine* cl;
5835 UWord cloff, tno, toff;
5836 UShort descr;
5837 stats__cline_swrite16s++;
5838 if (UNLIKELY(!aligned16(a))) goto slowcase;
5839 cl = get_cacheline(a);
5840 cloff = get_cacheline_offset(a);
5841 tno = get_treeno(a);
5842 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5843 descr = cl->descrs[tno];
5844 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5845 if (valid_value_is_below_me_16(descr, toff)) {
5846 /* Writing at this level. Need to fix up 'descr'. */
5847 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5848 /* At this point, the tree does not match cl->descr[tno] any
5849 more. The assignments below will fix it up. */
5850 } else {
5851 /* We can't indiscriminately write on the w16 node as in the
5852 w64 case, as that might make the node inconsistent with
5853 its parent. So first, pull down to this level. */
5854 SVal* tree = &cl->svals[tno << 3];
5855 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5856 if (CHECK_ZSM)
5857 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5858 }
5859 }
5860 tl_assert(svNew != SVal_INVALID);
5861 cl->svals[cloff + 0] = svNew;
5862 cl->svals[cloff + 1] = SVal_INVALID;
5863 return;
5864 slowcase: /* misaligned */
5865 stats__cline_16to8splits++;
5866 zsm_swrite08( a + 0, svNew );
5867 zsm_swrite08( a + 1, svNew );
5868 }
5869
5870 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5871
5872 static
zsm_swrite32(Addr a,SVal svNew)5873 void zsm_swrite32 ( Addr a, SVal svNew ) {
5874 CacheLine* cl;
5875 UWord cloff, tno, toff;
5876 UShort descr;
5877 stats__cline_swrite32s++;
5878 if (UNLIKELY(!aligned32(a))) goto slowcase;
5879 cl = get_cacheline(a);
5880 cloff = get_cacheline_offset(a);
5881 tno = get_treeno(a);
5882 toff = get_tree_offset(a); /* == 0 or 4 */
5883 descr = cl->descrs[tno];
5884 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5885 if (valid_value_is_above_me_32(descr, toff)) {
5886 /* We can't indiscriminately write on the w32 node as in the
5887 w64 case, as that might make the node inconsistent with
5888 its parent. So first, pull down to this level. */
5889 SVal* tree = &cl->svals[tno << 3];
5890 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5891 if (CHECK_ZSM)
5892 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5893 } else {
5894 /* Writing at this level. Need to fix up 'descr'. */
5895 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5896 /* At this point, the tree does not match cl->descr[tno] any
5897 more. The assignments below will fix it up. */
5898 }
5899 }
5900 tl_assert(svNew != SVal_INVALID);
5901 cl->svals[cloff + 0] = svNew;
5902 cl->svals[cloff + 1] = SVal_INVALID;
5903 cl->svals[cloff + 2] = SVal_INVALID;
5904 cl->svals[cloff + 3] = SVal_INVALID;
5905 return;
5906 slowcase: /* misaligned */
5907 stats__cline_32to16splits++;
5908 zsm_swrite16( a + 0, svNew );
5909 zsm_swrite16( a + 2, svNew );
5910 }
5911
5912 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5913
5914 static
zsm_swrite64(Addr a,SVal svNew)5915 void zsm_swrite64 ( Addr a, SVal svNew ) {
5916 CacheLine* cl;
5917 UWord cloff, tno;
5918 //UWord toff;
5919 stats__cline_swrite64s++;
5920 if (UNLIKELY(!aligned64(a))) goto slowcase;
5921 cl = get_cacheline(a);
5922 cloff = get_cacheline_offset(a);
5923 tno = get_treeno(a);
5924 //toff = get_tree_offset(a); /* == 0, unused */
5925 cl->descrs[tno] = TREE_DESCR_64;
5926 if (CHECK_ZSM)
5927 tl_assert(svNew != SVal_INVALID); /* EXPENSIVE */
5928 cl->svals[cloff + 0] = svNew;
5929 cl->svals[cloff + 1] = SVal_INVALID;
5930 cl->svals[cloff + 2] = SVal_INVALID;
5931 cl->svals[cloff + 3] = SVal_INVALID;
5932 cl->svals[cloff + 4] = SVal_INVALID;
5933 cl->svals[cloff + 5] = SVal_INVALID;
5934 cl->svals[cloff + 6] = SVal_INVALID;
5935 cl->svals[cloff + 7] = SVal_INVALID;
5936 return;
5937 slowcase: /* misaligned */
5938 stats__cline_64to32splits++;
5939 zsm_swrite32( a + 0, svNew );
5940 zsm_swrite32( a + 4, svNew );
5941 }
5942
5943 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5944
5945 static
zsm_sread08(Addr a)5946 SVal zsm_sread08 ( Addr a ) {
5947 CacheLine* cl;
5948 UWord cloff, tno, toff;
5949 UShort descr;
5950 stats__cline_sread08s++;
5951 cl = get_cacheline(a);
5952 cloff = get_cacheline_offset(a);
5953 tno = get_treeno(a);
5954 toff = get_tree_offset(a); /* == 0 .. 7 */
5955 descr = cl->descrs[tno];
5956 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5957 SVal* tree = &cl->svals[tno << 3];
5958 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5959 }
5960 return cl->svals[cloff];
5961 }
5962
zsm_scopy08(Addr src,Addr dst,Bool uu_normalise)5963 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5964 SVal sv;
5965 stats__cline_scopy08s++;
5966 sv = zsm_sread08( src );
5967 zsm_swrite08( dst, sv );
5968 }
5969
5970
5971 /* Block-copy states (needed for implementing realloc()). Note this
5972 doesn't change the filtering arrangements. The caller of
5973 zsm_scopy_range needs to attend to that. */
5974
zsm_scopy_range(Addr src,Addr dst,SizeT len)5975 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
5976 {
5977 SizeT i;
5978 if (len == 0)
5979 return;
5980
5981 /* assert for non-overlappingness */
5982 tl_assert(src+len <= dst || dst+len <= src);
5983
5984 /* To be simple, just copy byte by byte. But so as not to wreck
5985 performance for later accesses to dst[0 .. len-1], normalise
5986 destination lines as we finish with them, and also normalise the
5987 line containing the first and last address. */
5988 for (i = 0; i < len; i++) {
5989 Bool normalise
5990 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5991 || i == 0 /* first in range */
5992 || i == len-1; /* last in range */
5993 zsm_scopy08( src+i, dst+i, normalise );
5994 }
5995 }
5996
5997
5998 /* For setting address ranges to a given value. Has considerable
5999 sophistication so as to avoid generating large numbers of pointless
6000 cache loads/writebacks for large ranges. */
6001
6002 /* Do small ranges in-cache, in the obvious way. */
6003 static
zsm_sset_range_SMALL(Addr a,SizeT len,SVal svNew)6004 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
6005 {
6006 /* fast track a couple of common cases */
6007 if (len == 4 && aligned32(a)) {
6008 zsm_swrite32( a, svNew );
6009 return;
6010 }
6011 if (len == 8 && aligned64(a)) {
6012 zsm_swrite64( a, svNew );
6013 return;
6014 }
6015
6016 /* be completely general (but as efficient as possible) */
6017 if (len == 0) return;
6018
6019 if (!aligned16(a) && len >= 1) {
6020 zsm_swrite08( a, svNew );
6021 a += 1;
6022 len -= 1;
6023 tl_assert(aligned16(a));
6024 }
6025 if (len == 0) return;
6026
6027 if (!aligned32(a) && len >= 2) {
6028 zsm_swrite16( a, svNew );
6029 a += 2;
6030 len -= 2;
6031 tl_assert(aligned32(a));
6032 }
6033 if (len == 0) return;
6034
6035 if (!aligned64(a) && len >= 4) {
6036 zsm_swrite32( a, svNew );
6037 a += 4;
6038 len -= 4;
6039 tl_assert(aligned64(a));
6040 }
6041 if (len == 0) return;
6042
6043 if (len >= 8) {
6044 tl_assert(aligned64(a));
6045 while (len >= 8) {
6046 zsm_swrite64( a, svNew );
6047 a += 8;
6048 len -= 8;
6049 }
6050 tl_assert(aligned64(a));
6051 }
6052 if (len == 0) return;
6053
6054 if (len >= 4)
6055 tl_assert(aligned32(a));
6056 if (len >= 4) {
6057 zsm_swrite32( a, svNew );
6058 a += 4;
6059 len -= 4;
6060 }
6061 if (len == 0) return;
6062
6063 if (len >= 2)
6064 tl_assert(aligned16(a));
6065 if (len >= 2) {
6066 zsm_swrite16( a, svNew );
6067 a += 2;
6068 len -= 2;
6069 }
6070 if (len == 0) return;
6071
6072 if (len >= 1) {
6073 zsm_swrite08( a, svNew );
6074 //a += 1;
6075 len -= 1;
6076 }
6077 tl_assert(len == 0);
6078 }
6079
6080
6081 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
6082 for larger ranges, try to operate directly on the out-of-cache
6083 representation, rather than dragging lines into the cache,
6084 overwriting them, and forcing them out. This turns out to be an
6085 important performance optimisation.
6086
6087 Note that this doesn't change the filtering arrangements. The
6088 caller of zsm_sset_range needs to attend to that. */
6089
zsm_sset_range(Addr a,SizeT len,SVal svNew)6090 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
6091 {
6092 tl_assert(svNew != SVal_INVALID);
6093 stats__cache_make_New_arange += (ULong)len;
6094
6095 if (0 && len > 500)
6096 VG_(printf)("make New ( %#lx, %lu )\n", a, len );
6097
6098 if (0) {
6099 static UWord n_New_in_cache = 0;
6100 static UWord n_New_not_in_cache = 0;
6101 /* tag is 'a' with the in-line offset masked out,
6102 eg a[31]..a[4] 0000 */
6103 Addr tag = a & ~(N_LINE_ARANGE - 1);
6104 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
6105 if (LIKELY(tag == cache_shmem.tags0[wix])) {
6106 n_New_in_cache++;
6107 } else {
6108 n_New_not_in_cache++;
6109 }
6110 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
6111 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
6112 n_New_in_cache, n_New_not_in_cache );
6113 }
6114
6115 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
6116 zsm_sset_range_SMALL( a, len, svNew );
6117 } else {
6118 Addr before_start = a;
6119 Addr aligned_start = cacheline_ROUNDUP(a);
6120 Addr after_start = cacheline_ROUNDDN(a + len);
6121 UWord before_len = aligned_start - before_start;
6122 UWord aligned_len = after_start - aligned_start;
6123 UWord after_len = a + len - after_start;
6124 tl_assert(before_start <= aligned_start);
6125 tl_assert(aligned_start <= after_start);
6126 tl_assert(before_len < N_LINE_ARANGE);
6127 tl_assert(after_len < N_LINE_ARANGE);
6128 tl_assert(get_cacheline_offset(aligned_start) == 0);
6129 if (get_cacheline_offset(a) == 0) {
6130 tl_assert(before_len == 0);
6131 tl_assert(a == aligned_start);
6132 }
6133 if (get_cacheline_offset(a+len) == 0) {
6134 tl_assert(after_len == 0);
6135 tl_assert(after_start == a+len);
6136 }
6137 if (before_len > 0) {
6138 zsm_sset_range_SMALL( before_start, before_len, svNew );
6139 }
6140 if (after_len > 0) {
6141 zsm_sset_range_SMALL( after_start, after_len, svNew );
6142 }
6143 stats__cache_make_New_inZrep += (ULong)aligned_len;
6144
6145 while (1) {
6146 Addr tag;
6147 UWord wix;
6148 if (aligned_start >= after_start)
6149 break;
6150 tl_assert(get_cacheline_offset(aligned_start) == 0);
6151 tag = aligned_start & ~(N_LINE_ARANGE - 1);
6152 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
6153 if (tag == cache_shmem.tags0[wix]) {
6154 UWord i;
6155 for (i = 0; i < N_LINE_ARANGE / 8; i++)
6156 zsm_swrite64( aligned_start + i * 8, svNew );
6157 } else {
6158 UWord i;
6159 Word zix;
6160 SecMap* sm;
6161 LineZ* lineZ;
6162 /* This line is not in the cache. Do not force it in; instead
6163 modify it in-place. */
6164 /* find the Z line to write in and rcdec it or the
6165 associated F line. */
6166 find_Z_for_writing( &sm, &zix, tag );
6167 tl_assert(sm);
6168 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
6169 lineZ = &sm->linesZ[zix];
6170 lineZ->dict[0] = svNew;
6171 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6172 for (i = 0; i < N_LINE_ARANGE/4; i++)
6173 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6174 rcinc_LineZ(lineZ);
6175 }
6176 aligned_start += N_LINE_ARANGE;
6177 aligned_len -= N_LINE_ARANGE;
6178 }
6179 tl_assert(aligned_start == after_start);
6180 tl_assert(aligned_len == 0);
6181 }
6182 }
6183
6184
6185 /////////////////////////////////////////////////////////
6186 // //
6187 // Front-filtering accesses //
6188 // //
6189 /////////////////////////////////////////////////////////
6190
6191 static UWord stats__f_ac = 0;
6192 static UWord stats__f_sk = 0;
6193
6194 #if 0
6195 # define STATS__F_SHOW \
6196 do { \
6197 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
6198 VG_(printf)("filters: ac %lu sk %lu\n", \
6199 stats__f_ac, stats__f_sk); \
6200 } while (0)
6201 #else
6202 # define STATS__F_SHOW /* */
6203 #endif
6204
zsm_sapply08_f__msmcwrite(Thr * thr,Addr a)6205 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
6206 stats__f_ac++;
6207 STATS__F_SHOW;
6208 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
6209 stats__f_sk++;
6210 return;
6211 }
6212 zsm_sapply08__msmcwrite(thr, a);
6213 }
6214
zsm_sapply16_f__msmcwrite(Thr * thr,Addr a)6215 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
6216 stats__f_ac++;
6217 STATS__F_SHOW;
6218 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
6219 stats__f_sk++;
6220 return;
6221 }
6222 zsm_sapply16__msmcwrite(thr, a);
6223 }
6224
zsm_sapply32_f__msmcwrite(Thr * thr,Addr a)6225 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
6226 stats__f_ac++;
6227 STATS__F_SHOW;
6228 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
6229 stats__f_sk++;
6230 return;
6231 }
6232 zsm_sapply32__msmcwrite(thr, a);
6233 }
6234
zsm_sapply64_f__msmcwrite(Thr * thr,Addr a)6235 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
6236 stats__f_ac++;
6237 STATS__F_SHOW;
6238 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
6239 stats__f_sk++;
6240 return;
6241 }
6242 zsm_sapply64__msmcwrite(thr, a);
6243 }
6244
zsm_sapplyNN_f__msmcwrite(Thr * thr,Addr a,SizeT len)6245 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
6246 {
6247 /* fast track a couple of common cases */
6248 if (len == 4 && aligned32(a)) {
6249 zsm_sapply32_f__msmcwrite( thr, a );
6250 return;
6251 }
6252 if (len == 8 && aligned64(a)) {
6253 zsm_sapply64_f__msmcwrite( thr, a );
6254 return;
6255 }
6256
6257 /* be completely general (but as efficient as possible) */
6258 if (len == 0) return;
6259
6260 if (!aligned16(a) && len >= 1) {
6261 zsm_sapply08_f__msmcwrite( thr, a );
6262 a += 1;
6263 len -= 1;
6264 tl_assert(aligned16(a));
6265 }
6266 if (len == 0) return;
6267
6268 if (!aligned32(a) && len >= 2) {
6269 zsm_sapply16_f__msmcwrite( thr, a );
6270 a += 2;
6271 len -= 2;
6272 tl_assert(aligned32(a));
6273 }
6274 if (len == 0) return;
6275
6276 if (!aligned64(a) && len >= 4) {
6277 zsm_sapply32_f__msmcwrite( thr, a );
6278 a += 4;
6279 len -= 4;
6280 tl_assert(aligned64(a));
6281 }
6282 if (len == 0) return;
6283
6284 if (len >= 8) {
6285 tl_assert(aligned64(a));
6286 while (len >= 8) {
6287 zsm_sapply64_f__msmcwrite( thr, a );
6288 a += 8;
6289 len -= 8;
6290 }
6291 tl_assert(aligned64(a));
6292 }
6293 if (len == 0) return;
6294
6295 if (len >= 4)
6296 tl_assert(aligned32(a));
6297 if (len >= 4) {
6298 zsm_sapply32_f__msmcwrite( thr, a );
6299 a += 4;
6300 len -= 4;
6301 }
6302 if (len == 0) return;
6303
6304 if (len >= 2)
6305 tl_assert(aligned16(a));
6306 if (len >= 2) {
6307 zsm_sapply16_f__msmcwrite( thr, a );
6308 a += 2;
6309 len -= 2;
6310 }
6311 if (len == 0) return;
6312
6313 if (len >= 1) {
6314 zsm_sapply08_f__msmcwrite( thr, a );
6315 //a += 1;
6316 len -= 1;
6317 }
6318 tl_assert(len == 0);
6319 }
6320
zsm_sapply08_f__msmcread(Thr * thr,Addr a)6321 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6322 stats__f_ac++;
6323 STATS__F_SHOW;
6324 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6325 stats__f_sk++;
6326 return;
6327 }
6328 zsm_sapply08__msmcread(thr, a);
6329 }
6330
zsm_sapply16_f__msmcread(Thr * thr,Addr a)6331 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6332 stats__f_ac++;
6333 STATS__F_SHOW;
6334 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6335 stats__f_sk++;
6336 return;
6337 }
6338 zsm_sapply16__msmcread(thr, a);
6339 }
6340
zsm_sapply32_f__msmcread(Thr * thr,Addr a)6341 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6342 stats__f_ac++;
6343 STATS__F_SHOW;
6344 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6345 stats__f_sk++;
6346 return;
6347 }
6348 zsm_sapply32__msmcread(thr, a);
6349 }
6350
zsm_sapply64_f__msmcread(Thr * thr,Addr a)6351 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6352 stats__f_ac++;
6353 STATS__F_SHOW;
6354 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6355 stats__f_sk++;
6356 return;
6357 }
6358 zsm_sapply64__msmcread(thr, a);
6359 }
6360
zsm_sapplyNN_f__msmcread(Thr * thr,Addr a,SizeT len)6361 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6362 {
6363 /* fast track a couple of common cases */
6364 if (len == 4 && aligned32(a)) {
6365 zsm_sapply32_f__msmcread( thr, a );
6366 return;
6367 }
6368 if (len == 8 && aligned64(a)) {
6369 zsm_sapply64_f__msmcread( thr, a );
6370 return;
6371 }
6372
6373 /* be completely general (but as efficient as possible) */
6374 if (len == 0) return;
6375
6376 if (!aligned16(a) && len >= 1) {
6377 zsm_sapply08_f__msmcread( thr, a );
6378 a += 1;
6379 len -= 1;
6380 tl_assert(aligned16(a));
6381 }
6382 if (len == 0) return;
6383
6384 if (!aligned32(a) && len >= 2) {
6385 zsm_sapply16_f__msmcread( thr, a );
6386 a += 2;
6387 len -= 2;
6388 tl_assert(aligned32(a));
6389 }
6390 if (len == 0) return;
6391
6392 if (!aligned64(a) && len >= 4) {
6393 zsm_sapply32_f__msmcread( thr, a );
6394 a += 4;
6395 len -= 4;
6396 tl_assert(aligned64(a));
6397 }
6398 if (len == 0) return;
6399
6400 if (len >= 8) {
6401 tl_assert(aligned64(a));
6402 while (len >= 8) {
6403 zsm_sapply64_f__msmcread( thr, a );
6404 a += 8;
6405 len -= 8;
6406 }
6407 tl_assert(aligned64(a));
6408 }
6409 if (len == 0) return;
6410
6411 if (len >= 4)
6412 tl_assert(aligned32(a));
6413 if (len >= 4) {
6414 zsm_sapply32_f__msmcread( thr, a );
6415 a += 4;
6416 len -= 4;
6417 }
6418 if (len == 0) return;
6419
6420 if (len >= 2)
6421 tl_assert(aligned16(a));
6422 if (len >= 2) {
6423 zsm_sapply16_f__msmcread( thr, a );
6424 a += 2;
6425 len -= 2;
6426 }
6427 if (len == 0) return;
6428
6429 if (len >= 1) {
6430 zsm_sapply08_f__msmcread( thr, a );
6431 //a += 1;
6432 len -= 1;
6433 }
6434 tl_assert(len == 0);
6435 }
6436
libhb_Thr_resumes(Thr * thr)6437 void libhb_Thr_resumes ( Thr* thr )
6438 {
6439 if (0) VG_(printf)("resume %p\n", thr);
6440 tl_assert(thr);
6441 tl_assert(!thr->llexit_done);
6442 Filter__clear(thr->filter, "libhb_Thr_resumes");
6443 /* A kludge, but .. if this thread doesn't have any marker stacks
6444 at all, get one right now. This is easier than figuring out
6445 exactly when at thread startup we can and can't take a stack
6446 snapshot. */
6447 if (HG_(clo_history_level) == 1) {
6448 tl_assert(thr->local_Kws_n_stacks);
6449 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6450 note_local_Kw_n_stack_for(thr);
6451 }
6452 }
6453
6454
6455 /////////////////////////////////////////////////////////
6456 // //
6457 // Synchronisation objects //
6458 // //
6459 /////////////////////////////////////////////////////////
6460
6461 /* A double linked list of all the SO's. */
6462 SO* admin_SO = NULL;
6463
SO__Alloc(void)6464 static SO* SO__Alloc ( void )
6465 {
6466 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6467 so->viR = VtsID_INVALID;
6468 so->viW = VtsID_INVALID;
6469 so->magic = SO_MAGIC;
6470 /* Add to double linked list */
6471 if (admin_SO) {
6472 tl_assert(admin_SO->admin_prev == NULL);
6473 admin_SO->admin_prev = so;
6474 so->admin_next = admin_SO;
6475 } else {
6476 so->admin_next = NULL;
6477 }
6478 so->admin_prev = NULL;
6479 admin_SO = so;
6480 /* */
6481 return so;
6482 }
6483
SO__Dealloc(SO * so)6484 static void SO__Dealloc ( SO* so )
6485 {
6486 tl_assert(so);
6487 tl_assert(so->magic == SO_MAGIC);
6488 if (so->viR == VtsID_INVALID) {
6489 tl_assert(so->viW == VtsID_INVALID);
6490 } else {
6491 tl_assert(so->viW != VtsID_INVALID);
6492 VtsID__rcdec(so->viR);
6493 VtsID__rcdec(so->viW);
6494 }
6495 so->magic = 0;
6496 /* Del from double linked list */
6497 if (so->admin_prev)
6498 so->admin_prev->admin_next = so->admin_next;
6499 if (so->admin_next)
6500 so->admin_next->admin_prev = so->admin_prev;
6501 if (so == admin_SO)
6502 admin_SO = so->admin_next;
6503 /* */
6504 HG_(free)( so );
6505 }
6506
6507
6508 /////////////////////////////////////////////////////////
6509 // //
6510 // Top Level API //
6511 // //
6512 /////////////////////////////////////////////////////////
6513
show_thread_state(const HChar * str,Thr * t)6514 static void show_thread_state ( const HChar* str, Thr* t )
6515 {
6516 if (1) return;
6517 if (t->viR == t->viW) {
6518 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6519 VtsID__pp( t->viR );
6520 VG_(printf)("%s","\n");
6521 } else {
6522 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6523 VtsID__pp( t->viR );
6524 VG_(printf)(" viW %u==", t->viW);
6525 VtsID__pp( t->viW );
6526 VG_(printf)("%s","\n");
6527 }
6528 }
6529
6530
libhb_init(void (* get_stacktrace)(Thr *,Addr *,UWord),ExeContext * (* get_EC)(Thr *))6531 Thr* libhb_init (
6532 void (*get_stacktrace)( Thr*, Addr*, UWord ),
6533 ExeContext* (*get_EC)( Thr* )
6534 )
6535 {
6536 Thr* thr;
6537 VtsID vi;
6538
6539 // We will have to have to store a large number of these,
6540 // so make sure they're the size we expect them to be.
6541 STATIC_ASSERT(sizeof(ScalarTS) == 8);
6542
6543 /* because first 1024 unusable */
6544 STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6545 /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6546 Thr_n_RCEC and TSW). */
6547 STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
6548
6549 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6550 (32-bit). It's not correctness-critical, but there are a lot of
6551 them, so it's important from a space viewpoint. Unfortunately
6552 we simply can't pack it into 2 words on a 32-bit target. */
6553 STATIC_ASSERT( (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6554 || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6555 STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
6556
6557 /* Word sets really are 32 bits. Even on a 64 bit target. */
6558 STATIC_ASSERT(sizeof(WordSetID) == 4);
6559 STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
6560
6561 tl_assert(get_stacktrace);
6562 tl_assert(get_EC);
6563 main_get_stacktrace = get_stacktrace;
6564 main_get_EC = get_EC;
6565
6566 // No need to initialise hg_wordfm.
6567 // No need to initialise hg_wordset.
6568
6569 /* Allocated once and never deallocated. Used as a temporary in
6570 VTS singleton, tick and join operations. */
6571 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6572 temp_max_sized_VTS->id = VtsID_INVALID;
6573 verydead_thread_tables_init();
6574 vts_set_init();
6575 vts_tab_init();
6576 event_map_init();
6577 VtsID__invalidate_caches();
6578
6579 // initialise shadow memory
6580 zsm_init( );
6581
6582 thr = Thr__new();
6583 vi = VtsID__mk_Singleton( thr, 1 );
6584 thr->viR = vi;
6585 thr->viW = vi;
6586 VtsID__rcinc(thr->viR);
6587 VtsID__rcinc(thr->viW);
6588
6589 show_thread_state(" root", thr);
6590 return thr;
6591 }
6592
6593
libhb_create(Thr * parent)6594 Thr* libhb_create ( Thr* parent )
6595 {
6596 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6597 the child's index. Since the child's index is guaranteed
6598 unique, it has never been seen before, so the implicit value
6599 before the tick is zero and after that is one. */
6600 Thr* child = Thr__new();
6601
6602 child->viR = VtsID__tick( parent->viR, child );
6603 child->viW = VtsID__tick( parent->viW, child );
6604 Filter__clear(child->filter, "libhb_create(child)");
6605 VtsID__rcinc(child->viR);
6606 VtsID__rcinc(child->viW);
6607 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6608 early for that - it may not have a valid TId yet. So, let
6609 libhb_Thr_resumes pick it up the first time the thread runs. */
6610
6611 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6612 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6613
6614 /* and the parent has to move along too */
6615 VtsID__rcdec(parent->viR);
6616 VtsID__rcdec(parent->viW);
6617 parent->viR = VtsID__tick( parent->viR, parent );
6618 parent->viW = VtsID__tick( parent->viW, parent );
6619 Filter__clear(parent->filter, "libhb_create(parent)");
6620 VtsID__rcinc(parent->viR);
6621 VtsID__rcinc(parent->viW);
6622 note_local_Kw_n_stack_for( parent );
6623
6624 show_thread_state(" child", child);
6625 show_thread_state("parent", parent);
6626
6627 return child;
6628 }
6629
6630 /* Shut down the library, and print stats (in fact that's _all_
6631 this is for. */
libhb_shutdown(Bool show_stats)6632 void libhb_shutdown ( Bool show_stats )
6633 {
6634 if (show_stats) {
6635 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6636 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6637 stats__secmaps_allocd,
6638 stats__secmap_ga_space_covered);
6639 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6640 stats__secmap_linesZ_allocd,
6641 stats__secmap_linesZ_bytes);
6642 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6643 " (%'10lu used)\n",
6644 VG_(sizePA) (LineF_pool_allocator),
6645 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
6646 shmem__SecMap_used_linesF());
6647 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6648 " #%lu scanGC \n",
6649 stats__secmaps_in_map_shmem,
6650 shmem__SecMap_do_GC(False /* really do GC */),
6651 stats__secmaps_scanGC);
6652 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6653 VG_(printf)(" secmaps: %'10lu in freelist,"
6654 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6655 SecMap_freelist_length(),
6656 stats__secmaps_scanGCed,
6657 stats__secmaps_ssetGCed);
6658 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6659 stats__secmaps_search, stats__secmaps_search_slow);
6660
6661 VG_(printf)("%s","\n");
6662 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6663 stats__cache_totrefs, stats__cache_totmisses );
6664 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6665 stats__cache_Z_fetches, stats__cache_F_fetches );
6666 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6667 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6668 VG_(printf)(" cache: %'14lu flushes_invals\n",
6669 stats__cache_flushes_invals );
6670 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6671 stats__cache_make_New_arange,
6672 stats__cache_make_New_inZrep);
6673
6674 VG_(printf)("%s","\n");
6675 VG_(printf)(" cline: %'10lu normalises\n",
6676 stats__cline_normalises );
6677 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6678 stats__cline_cread64s,
6679 stats__cline_cread32s,
6680 stats__cline_cread16s,
6681 stats__cline_cread08s );
6682 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6683 stats__cline_cwrite64s,
6684 stats__cline_cwrite32s,
6685 stats__cline_cwrite16s,
6686 stats__cline_cwrite08s );
6687 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6688 stats__cline_swrite64s,
6689 stats__cline_swrite32s,
6690 stats__cline_swrite16s,
6691 stats__cline_swrite08s );
6692 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6693 stats__cline_sread08s, stats__cline_scopy08s );
6694 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6695 " 2to1 %'12lu\n",
6696 stats__cline_64to32splits, stats__cline_32to16splits,
6697 stats__cline_16to8splits );
6698 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6699 " 2to1 %'12lu\n",
6700 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6701 stats__cline_16to8pulldown );
6702 if (0)
6703 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6704 " covers %ld bytes of arange\n",
6705 (Word)sizeof(LineZ),
6706 (Word)N_LINE_ARANGE);
6707
6708 VG_(printf)("%s","\n");
6709
6710 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
6711 stats__msmcread, stats__msmcread_change);
6712 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6713 stats__msmcwrite, stats__msmcwrite_change);
6714 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6715 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6716 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6717 stats__join2_queries, stats__join2_misses);
6718
6719 VG_(printf)("%s","\n");
6720 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6721 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6722 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6723 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6724 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6725 " (%'lu allocd)\n",
6726 stats__vts_set__focaa, stats__vts_set__focaa_a );
6727 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6728 stats__vts__indexat_slow );
6729
6730 VG_(printf)("%s","\n");
6731 VG_(printf)(
6732 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6733 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6734 );
6735 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6736 stats__vts_tab_GC, stats__vts_pruning);
6737 VG_(printf)( " libhb: %lu entries in vts_set\n",
6738 VG_(sizeFM)( vts_set ) );
6739
6740 VG_(printf)("%s","\n");
6741 {
6742 UInt live = 0;
6743 UInt llexit_done = 0;
6744 UInt joinedwith_done = 0;
6745 UInt llexit_and_joinedwith_done = 0;
6746
6747 Thread* hgthread = get_admin_threads();
6748 tl_assert(hgthread);
6749 while (hgthread) {
6750 Thr* hbthr = hgthread->hbthr;
6751 tl_assert(hbthr);
6752 if (hbthr->llexit_done && hbthr->joinedwith_done)
6753 llexit_and_joinedwith_done++;
6754 else if (hbthr->llexit_done)
6755 llexit_done++;
6756 else if (hbthr->joinedwith_done)
6757 joinedwith_done++;
6758 else
6759 live++;
6760 hgthread = hgthread->admin;
6761 }
6762 VG_(printf)(" libhb: threads live: %u exit_and_joinedwith %u"
6763 " exit %u joinedwith %u\n",
6764 live, llexit_and_joinedwith_done,
6765 llexit_done, joinedwith_done);
6766 VG_(printf)(" libhb: %d verydead_threads, "
6767 "%d verydead_threads_not_pruned\n",
6768 (int) VG_(sizeXA)( verydead_thread_table),
6769 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6770 tl_assert (VG_(sizeXA)( verydead_thread_table)
6771 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6772 == llexit_and_joinedwith_done);
6773 }
6774
6775 VG_(printf)("%s","\n");
6776 VG_(printf)( " libhb: oldrefHTN %lu (%'d bytes)\n",
6777 oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6778 tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6779 VG_(printf)( " libhb: oldref lookup found=%lu notfound=%lu\n",
6780 stats__evm__lookup_found, stats__evm__lookup_notfound);
6781 if (VG_(clo_verbosity) > 1)
6782 VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6783 VG_(printf)( " libhb: oldref bind tsw/rcec "
6784 "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
6785 stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6786 stats__ctxt_neq_tsw_neq_rcec);
6787 VG_(printf)( " libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
6788 stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
6789 VG_(printf)( " libhb: contextTab: %lu slots,"
6790 " %lu cur ents(ref'd %lu),"
6791 " %lu max ents\n",
6792 (UWord)N_RCEC_TAB,
6793 stats__ctxt_tab_curr, RCEC_referenced,
6794 stats__ctxt_tab_max );
6795 VG_(printf) (" libhb: stats__cached_rcec "
6796 "identical %'lu updated %'lu fresh %'lu\n",
6797 stats__cached_rcec_identical, stats__cached_rcec_updated,
6798 stats__cached_rcec_fresh);
6799 if (stats__cached_rcec_diff > 0)
6800 VG_(printf) (" libhb: stats__cached_rcec diff unk reason%'lu\n",
6801 stats__cached_rcec_diff);
6802 if (stats__cached_rcec_diff_known_reason > 0)
6803 VG_(printf) (" libhb: stats__cached_rcec diff known reason %'lu\n",
6804 stats__cached_rcec_diff_known_reason);
6805
6806 {
6807 # define MAXCHAIN 10
6808 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6809 UInt non0chain = 0;
6810 UInt n;
6811 UInt i;
6812 RCEC *p;
6813
6814 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6815 for (i = 0; i < N_RCEC_TAB; i++) {
6816 n = 0;
6817 for (p = contextTab[i]; p; p = p->next)
6818 n++;
6819 if (n < MAXCHAIN)
6820 chains[n]++;
6821 else
6822 chains[MAXCHAIN]++;
6823 if (n > 0)
6824 non0chain++;
6825 }
6826 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6827 " Avg chain len %3.1f\n"
6828 " ",
6829 (Double)stats__ctxt_tab_curr
6830 / (Double)(non0chain ? non0chain : 1));
6831 for (i = 0; i <= MAXCHAIN; i++) {
6832 if (chains[i] != 0)
6833 VG_(printf)( "[%u%s]=%u ",
6834 i, i == MAXCHAIN ? "+" : "",
6835 chains[i]);
6836 }
6837 VG_(printf)( "\n");
6838 # undef MAXCHAIN
6839 }
6840 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6841 stats__ctxt_tab_qs,
6842 stats__ctxt_tab_cmps );
6843 #if 0
6844 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6845 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6846 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6847 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6848 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6849 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6850 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6851 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6852 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6853 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6854 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6855 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6856 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6857 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6858
6859 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6860 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6861 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6862 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6863 #endif
6864
6865 VG_(printf)("%s","<<< END libhb stats >>>\n");
6866 VG_(printf)("%s","\n");
6867
6868 }
6869 }
6870
6871 /* Receive notification that a thread has low level exited. The
6872 significance here is that we do not expect to see any more memory
6873 references from it. */
libhb_async_exit(Thr * thr)6874 void libhb_async_exit ( Thr* thr )
6875 {
6876 tl_assert(thr);
6877 tl_assert(!thr->llexit_done);
6878 thr->llexit_done = True;
6879
6880 /* Check nobody messed up with the cached_rcec */
6881 tl_assert (thr->cached_rcec.magic == RCEC_MAGIC);
6882 tl_assert (thr->cached_rcec.rc == 0);
6883 tl_assert (thr->cached_rcec.rcX == 0);
6884 tl_assert (thr->cached_rcec.next == NULL);
6885
6886 /* Just to be sure, declare the cached stack invalid. */
6887 set_cached_rcec_validity(thr, False);
6888
6889 /* free up Filter and local_Kws_n_stacks (well, actually not the
6890 latter ..) */
6891 tl_assert(thr->filter);
6892 HG_(free)(thr->filter);
6893 thr->filter = NULL;
6894
6895 /* Tell the VTS mechanism this thread has exited, so it can
6896 participate in VTS pruning. Note this can only happen if the
6897 thread has both ll_exited and has been joined with. */
6898 if (thr->joinedwith_done)
6899 VTS__declare_thread_very_dead(thr);
6900
6901 /* Another space-accuracy tradeoff. Do we want to be able to show
6902 H1 history for conflicts in threads which have since exited? If
6903 yes, then we better not free up thr->local_Kws_n_stacks. The
6904 downside is a potential per-thread leak of up to
6905 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6906 XArray average overcommit factor is (1.5 I'd guess). */
6907 // hence:
6908 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6909 // thr->local_Kws_n_stacks = NULL;
6910 }
6911
6912 /* Receive notification that a thread has been joined with. The
6913 significance here is that we do not expect to see any further
6914 references to its vector clocks (Thr::viR and Thr::viW). */
libhb_joinedwith_done(Thr * thr)6915 void libhb_joinedwith_done ( Thr* thr )
6916 {
6917 tl_assert(thr);
6918 /* Caller must ensure that this is only ever called once per Thr. */
6919 tl_assert(!thr->joinedwith_done);
6920 thr->joinedwith_done = True;
6921 if (thr->llexit_done)
6922 VTS__declare_thread_very_dead(thr);
6923 }
6924
6925
6926 /* Both Segs and SOs point to VTSs. However, there is no sharing, so
6927 a Seg that points at a VTS is its one-and-only owner, and ditto for
6928 a SO that points at a VTS. */
6929
libhb_so_alloc(void)6930 SO* libhb_so_alloc ( void )
6931 {
6932 return SO__Alloc();
6933 }
6934
libhb_so_dealloc(SO * so)6935 void libhb_so_dealloc ( SO* so )
6936 {
6937 tl_assert(so);
6938 tl_assert(so->magic == SO_MAGIC);
6939 SO__Dealloc(so);
6940 }
6941
6942 /* See comments in libhb.h for details on the meaning of
6943 strong vs weak sends and strong vs weak receives. */
libhb_so_send(Thr * thr,SO * so,Bool strong_send)6944 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6945 {
6946 /* Copy the VTSs from 'thr' into the sync object, and then move
6947 the thread along one step. */
6948
6949 tl_assert(so);
6950 tl_assert(so->magic == SO_MAGIC);
6951
6952 /* stay sane .. a thread's read-clock must always lead or be the
6953 same as its write-clock */
6954 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6955 tl_assert(leq);
6956 }
6957
6958 /* since we're overwriting the VtsIDs in the SO, we need to drop
6959 any references made by the previous contents thereof */
6960 if (so->viR == VtsID_INVALID) {
6961 tl_assert(so->viW == VtsID_INVALID);
6962 so->viR = thr->viR;
6963 so->viW = thr->viW;
6964 VtsID__rcinc(so->viR);
6965 VtsID__rcinc(so->viW);
6966 } else {
6967 /* In a strong send, we dump any previous VC in the SO and
6968 install the sending thread's VC instead. For a weak send we
6969 must join2 with what's already there. */
6970 tl_assert(so->viW != VtsID_INVALID);
6971 VtsID__rcdec(so->viR);
6972 VtsID__rcdec(so->viW);
6973 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6974 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6975 VtsID__rcinc(so->viR);
6976 VtsID__rcinc(so->viW);
6977 }
6978
6979 /* move both parent clocks along */
6980 VtsID__rcdec(thr->viR);
6981 VtsID__rcdec(thr->viW);
6982 thr->viR = VtsID__tick( thr->viR, thr );
6983 thr->viW = VtsID__tick( thr->viW, thr );
6984 if (!thr->llexit_done) {
6985 Filter__clear(thr->filter, "libhb_so_send");
6986 note_local_Kw_n_stack_for(thr);
6987 }
6988 VtsID__rcinc(thr->viR);
6989 VtsID__rcinc(thr->viW);
6990
6991 if (strong_send)
6992 show_thread_state("s-send", thr);
6993 else
6994 show_thread_state("w-send", thr);
6995 }
6996
libhb_so_recv(Thr * thr,SO * so,Bool strong_recv)6997 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6998 {
6999 tl_assert(so);
7000 tl_assert(so->magic == SO_MAGIC);
7001
7002 if (so->viR != VtsID_INVALID) {
7003 tl_assert(so->viW != VtsID_INVALID);
7004
7005 /* Weak receive (basically, an R-acquisition of a R-W lock).
7006 This advances the read-clock of the receiver, but not the
7007 write-clock. */
7008 VtsID__rcdec(thr->viR);
7009 thr->viR = VtsID__join2( thr->viR, so->viR );
7010 VtsID__rcinc(thr->viR);
7011
7012 /* At one point (r10589) it seemed safest to tick the clocks for
7013 the receiving thread after the join. But on reflection, I
7014 wonder if that might cause it to 'overtake' constraints,
7015 which could lead to missing races. So, back out that part of
7016 r10589. */
7017 //VtsID__rcdec(thr->viR);
7018 //thr->viR = VtsID__tick( thr->viR, thr );
7019 //VtsID__rcinc(thr->viR);
7020
7021 /* For a strong receive, we also advance the receiver's write
7022 clock, which means the receive as a whole is essentially
7023 equivalent to a W-acquisition of a R-W lock. */
7024 if (strong_recv) {
7025 VtsID__rcdec(thr->viW);
7026 thr->viW = VtsID__join2( thr->viW, so->viW );
7027 VtsID__rcinc(thr->viW);
7028
7029 /* See comment just above, re r10589. */
7030 //VtsID__rcdec(thr->viW);
7031 //thr->viW = VtsID__tick( thr->viW, thr );
7032 //VtsID__rcinc(thr->viW);
7033 }
7034
7035 if (thr->filter)
7036 Filter__clear(thr->filter, "libhb_so_recv");
7037 note_local_Kw_n_stack_for(thr);
7038
7039 if (strong_recv)
7040 show_thread_state("s-recv", thr);
7041 else
7042 show_thread_state("w-recv", thr);
7043
7044 } else {
7045 tl_assert(so->viW == VtsID_INVALID);
7046 /* Deal with degenerate case: 'so' has no vts, so there has been
7047 no message posted to it. Just ignore this case. */
7048 show_thread_state("d-recv", thr);
7049 }
7050 }
7051
libhb_so_everSent(SO * so)7052 Bool libhb_so_everSent ( SO* so )
7053 {
7054 if (so->viR == VtsID_INVALID) {
7055 tl_assert(so->viW == VtsID_INVALID);
7056 return False;
7057 } else {
7058 tl_assert(so->viW != VtsID_INVALID);
7059 return True;
7060 }
7061 }
7062
7063 #define XXX1 0 // 0x67a106c
7064 #define XXX2 0
7065
TRACEME(Addr a,SizeT szB)7066 static inline Bool TRACEME(Addr a, SizeT szB) {
7067 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
7068 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
7069 return False;
7070 }
trace(Thr * thr,Addr a,SizeT szB,const HChar * s)7071 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
7072 {
7073 SVal sv = zsm_sread08(a);
7074 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
7075 show_thread_state("", thr);
7076 VG_(printf)("%s","\n");
7077 }
7078
libhb_srange_new(Thr * thr,Addr a,SizeT szB)7079 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
7080 {
7081 SVal sv = SVal__mkC(thr->viW, thr->viW);
7082 tl_assert(is_sane_SVal_C(sv));
7083 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
7084 zsm_sset_range( a, szB, sv );
7085 Filter__clear_range( thr->filter, a, szB );
7086 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
7087 }
7088
libhb_srange_noaccess_NoFX(Thr * thr,Addr a,SizeT szB)7089 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
7090 {
7091 /* do nothing */
7092 }
7093
7094
7095 /* Set the lines zix_start till zix_end to NOACCESS. */
zsm_secmap_line_range_noaccess(SecMap * sm,UInt zix_start,UInt zix_end)7096 static void zsm_secmap_line_range_noaccess (SecMap *sm,
7097 UInt zix_start, UInt zix_end)
7098 {
7099 for (UInt lz = zix_start; lz <= zix_end; lz++) {
7100 LineZ* lineZ;
7101 lineZ = &sm->linesZ[lz];
7102 if (lineZ->dict[0] != SVal_INVALID) {
7103 rcdec_LineZ(lineZ);
7104 lineZ->dict[0] = SVal_NOACCESS;
7105 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
7106 } else {
7107 clear_LineF_of_Z(lineZ);
7108 }
7109 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
7110 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
7111 }
7112 }
7113
7114 /* Set the given range to SVal_NOACCESS in-place in the secmap.
7115 a must be cacheline aligned. len must be a multiple of a cacheline
7116 and must be < N_SECMAP_ARANGE. */
zsm_sset_range_noaccess_in_secmap(Addr a,SizeT len)7117 static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
7118 {
7119 tl_assert (is_valid_scache_tag (a));
7120 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
7121 tl_assert (len < N_SECMAP_ARANGE);
7122
7123 SecMap *sm1 = shmem__find_SecMap (a);
7124 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
7125 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
7126 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
7127
7128 if (sm1) {
7129 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
7130 zsm_secmap_line_range_noaccess (sm1, zix_start,
7131 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
7132 }
7133 if (sm2 && sm1 != sm2) {
7134 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
7135 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
7136 }
7137 }
7138
7139 /* Set the given address range to SVal_NOACCESS.
7140 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
zsm_sset_range_noaccess(Addr addr,SizeT len)7141 static void zsm_sset_range_noaccess (Addr addr, SizeT len)
7142 {
7143 /*
7144 BPC = Before, Partial Cacheline, = addr
7145 (i.e. starting inside a cacheline/inside a SecMap)
7146 BFC = Before, Full Cacheline(s), but not full SecMap
7147 (i.e. starting inside a SecMap)
7148 FSM = Full SecMap(s)
7149 (i.e. starting a SecMap)
7150 AFC = After, Full Cacheline(s), but not full SecMap
7151 (i.e. first address after the full SecMap(s))
7152 APC = After, Partial Cacheline, i.e. first address after the
7153 full CacheLines).
7154 ARE = After Range End = addr+len = first address not part of the range.
7155
7156 If addr starts a Cacheline, then BPC == BFC.
7157 If addr starts a SecMap, then BPC == BFC == FSM.
7158 If addr+len starts a SecMap, then APC == ARE == AFC
7159 If addr+len starts a Cacheline, then APC == ARE
7160 */
7161 Addr ARE = addr + len;
7162 Addr BPC = addr;
7163 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
7164 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
7165 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
7166 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
7167 SizeT Plen = len; // Plen will be split between the following:
7168 SizeT BPClen;
7169 SizeT BFClen;
7170 SizeT FSMlen;
7171 SizeT AFClen;
7172 SizeT APClen;
7173
7174 /* Consumes from Plen the nr of bytes between from and to.
7175 from and to must be aligned on a multiple of round.
7176 The length consumed will be a multiple of round, with
7177 a maximum of Plen. */
7178 # define PlenCONSUME(from, to, round, consumed) \
7179 do { \
7180 if (from < to) { \
7181 if (to - from < Plen) \
7182 consumed = to - from; \
7183 else \
7184 consumed = ROUNDDN(Plen, round); \
7185 } else { \
7186 consumed = 0; \
7187 } \
7188 Plen -= consumed; } while (0)
7189
7190 PlenCONSUME(BPC, BFC, 1, BPClen);
7191 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
7192 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
7193 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
7194 PlenCONSUME(APC, ARE, 1, APClen);
7195
7196 if (0)
7197 VG_(printf) ("addr %p[%lu] ARE %p"
7198 " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
7199 " AFC %p[%lu] APC %p[%lu]\n",
7200 (void*)addr, len, (void*)ARE,
7201 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
7202 (void*)AFC, AFClen, (void*)APC, APClen);
7203
7204 tl_assert (Plen == 0);
7205
7206 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
7207
7208 /* First we set the partial cachelines. This is done through the cache. */
7209 if (BPClen > 0)
7210 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
7211 if (APClen > 0)
7212 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
7213
7214 /* After this, we will not use the cache anymore. We will directly work
7215 in-place on the z shadow memory in SecMap(s).
7216 So, we invalidate the cachelines for the whole range we are setting
7217 to NOACCESS below. */
7218 shmem__invalidate_scache_range (BFC, APC - BFC);
7219
7220 if (BFClen > 0)
7221 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
7222 if (AFClen > 0)
7223 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
7224
7225 if (FSMlen > 0) {
7226 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
7227 free list. */
7228 Addr sm_start = FSM;
7229 while (sm_start < AFC) {
7230 SecMap *sm = shmem__find_SecMap (sm_start);
7231 if (sm) {
7232 Addr gaKey;
7233 SecMap *fm_sm;
7234
7235 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
7236 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
7237 LineZ *lineZ = &sm->linesZ[lz];
7238 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
7239 rcdec_LineZ(lineZ);
7240 else
7241 clear_LineF_of_Z(lineZ);
7242 }
7243 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
7244 tl_assert (0);
7245 stats__secmaps_in_map_shmem--;
7246 tl_assert (gaKey == sm_start);
7247 tl_assert (sm == fm_sm);
7248 stats__secmaps_ssetGCed++;
7249 push_SecMap_on_freelist (sm);
7250 }
7251 sm_start += N_SECMAP_ARANGE;
7252 }
7253 tl_assert (sm_start == AFC);
7254
7255 /* The above loop might have kept copies of freed SecMap in the smCache.
7256 => clear them. */
7257 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
7258 smCache[0].gaKey = 1;
7259 smCache[0].sm = NULL;
7260 }
7261 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
7262 smCache[1].gaKey = 1;
7263 smCache[1].sm = NULL;
7264 }
7265 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
7266 smCache[2].gaKey = 1;
7267 smCache[2].sm = NULL;
7268 }
7269 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
7270 }
7271 }
7272
libhb_srange_noaccess_AHAE(Thr * thr,Addr a,SizeT szB)7273 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
7274 {
7275 /* This really does put the requested range in NoAccess. It's
7276 expensive though. */
7277 SVal sv = SVal_NOACCESS;
7278 tl_assert(is_sane_SVal_C(sv));
7279 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7280 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7281 else
7282 zsm_sset_range_noaccess (a, szB);
7283 Filter__clear_range( thr->filter, a, szB );
7284 }
7285
7286 /* Works byte at a time. Can be optimised if needed. */
libhb_srange_get_abits(Addr a,UChar * abits,SizeT len)7287 UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7288 {
7289 UWord anr = 0; // nr of bytes addressable.
7290
7291 /* Get the accessibility of each byte. Pay attention to not
7292 create SecMap or LineZ when checking if a byte is addressable.
7293
7294 Note: this is used for client request. Performance deemed not critical.
7295 So for simplicity, we work byte per byte.
7296 Performance could be improved by working with full cachelines
7297 or with full SecMap, when reaching a cacheline or secmap boundary. */
7298 for (SizeT i = 0; i < len; i++) {
7299 SVal sv = SVal_INVALID;
7300 Addr b = a + i;
7301 Addr tag = b & ~(N_LINE_ARANGE - 1);
7302 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7303 UWord cloff = get_cacheline_offset(b);
7304
7305 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7306 and/or SecMap for non addressable bytes. */
7307 if (tag == cache_shmem.tags0[wix]) {
7308 CacheLine copy = cache_shmem.lyns0[wix];
7309 /* We work on a copy of the cacheline, as we do not want to
7310 record the client request as a real read.
7311 The below is somewhat similar to zsm_sapply08__msmcread but
7312 avoids side effects on the cache. */
7313 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7314 UWord tno = get_treeno(b);
7315 UShort descr = copy.descrs[tno];
7316 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7317 SVal* tree = ©.svals[tno << 3];
7318 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7319 }
7320 sv = copy.svals[cloff];
7321 } else {
7322 /* Byte not found in the cacheline. Search for a SecMap. */
7323 SecMap *sm = shmem__find_SecMap(b);
7324 LineZ *lineZ;
7325 if (sm == NULL)
7326 sv = SVal_NOACCESS;
7327 else {
7328 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7329 lineZ = &sm->linesZ[zix];
7330 if (lineZ->dict[0] == SVal_INVALID) {
7331 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
7332 sv = lineF->w64s[cloff];
7333 } else {
7334 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7335 sv = lineZ->dict[ix];
7336 }
7337 }
7338 }
7339
7340 tl_assert (sv != SVal_INVALID);
7341 if (sv == SVal_NOACCESS) {
7342 if (abits)
7343 abits[i] = 0x00;
7344 } else {
7345 if (abits)
7346 abits[i] = 0xff;
7347 anr++;
7348 }
7349 }
7350
7351 return anr;
7352 }
7353
7354
libhb_srange_untrack(Thr * thr,Addr a,SizeT szB)7355 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7356 {
7357 SVal sv = SVal_NOACCESS;
7358 tl_assert(is_sane_SVal_C(sv));
7359 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
7360 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7361 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7362 else
7363 zsm_sset_range_noaccess (a, szB);
7364 Filter__clear_range( thr->filter, a, szB );
7365 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7366 }
7367
libhb_get_Thr_hgthread(Thr * thr)7368 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
7369 tl_assert(thr);
7370 return thr->hgthread;
7371 }
7372
libhb_set_Thr_hgthread(Thr * thr,Thread * hgthread)7373 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
7374 tl_assert(thr);
7375 thr->hgthread = hgthread;
7376 }
7377
libhb_copy_shadow_state(Thr * thr,Addr src,Addr dst,SizeT len)7378 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
7379 {
7380 zsm_scopy_range(src, dst, len);
7381 Filter__clear_range( thr->filter, dst, len );
7382 }
7383
libhb_maybe_GC(void)7384 void libhb_maybe_GC ( void )
7385 {
7386 /* GC the unreferenced (zero rc) RCECs when
7387 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7388 with mostly NULL ptr)
7389 and (2) approaching the max nr of RCEC (as we have in any case
7390 at least that amount of RCEC in the pool allocator)
7391 Note: the margin allows to avoid a small but constant increase
7392 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7393 not called when the current nr of RCEC exactly reaches the max.
7394 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7395 Avoid growing too much the nr of RCEC keeps the memory use low,
7396 and avoids to have too many elements in the (fixed) contextTab hashtable.
7397 */
7398 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
7399 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
7400 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
7401 do_RCEC_GC();
7402
7403 /* If there are still no entries available (all the table entries are full),
7404 and we hit the threshold point, then do a GC */
7405 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7406 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7407 if (UNLIKELY (vts_tab_GC))
7408 vts_tab__do_GC( False/*don't show stats*/ );
7409
7410 /* scan GC the SecMaps when
7411 (1) no SecMap in the freelist
7412 and (2) the current nr of live secmaps exceeds the threshold. */
7413 if (UNLIKELY(SecMap_freelist == NULL
7414 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7415 // If we did a vts tab GC, then no need to flush the cache again.
7416 if (!vts_tab_GC)
7417 zsm_flush_cache();
7418 shmem__SecMap_do_GC(True);
7419 }
7420
7421 /* Check the reference counts (expensive) */
7422 if (CHECK_CEM)
7423 event_map__check_reference_counts();
7424 }
7425
7426
7427 /////////////////////////////////////////////////////////////////
7428 /////////////////////////////////////////////////////////////////
7429 // //
7430 // SECTION END main library //
7431 // //
7432 /////////////////////////////////////////////////////////////////
7433 /////////////////////////////////////////////////////////////////
7434
7435 /*--------------------------------------------------------------------*/
7436 /*--- end libhb_main.c ---*/
7437 /*--------------------------------------------------------------------*/
7438