1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2 |*
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 |*
7 \*===----------------------------------------------------------------------===*/
8 
9 #include <assert.h>
10 #include <limits.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "InstrProfiling.h"
16 #include "InstrProfilingInternal.h"
17 #include "InstrProfilingUtil.h"
18 
19 #define INSTR_PROF_VALUE_PROF_DATA
20 #define INSTR_PROF_COMMON_API_IMPL
21 #define INSTR_PROF_VALUE_PROF_MEMOP_API
22 #include "profile/InstrProfData.inc"
23 
24 static int hasStaticCounters = 1;
25 static int OutOfNodesWarnings = 0;
26 static int hasNonDefaultValsPerSite = 0;
27 #define INSTR_PROF_MAX_VP_WARNS 10
28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29 #define INSTR_PROF_VNODE_POOL_SIZE 1024
30 
31 #ifndef _MSC_VER
32 /* A shared static pool in addition to the vnodes statically
33  * allocated by the compiler.  */
34 COMPILER_RT_VISIBILITY ValueProfNode
35     lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
36        COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37 #endif
38 
39 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
40     INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
41 
lprofSetupValueProfiler(void)42 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
43   const char *Str = 0;
44   Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
45   if (Str && Str[0]) {
46     VPMaxNumValsPerSite = atoi(Str);
47     hasNonDefaultValsPerSite = 1;
48   }
49   if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
50     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51 }
52 
lprofSetMaxValsPerSite(uint32_t MaxVals)53 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
54   VPMaxNumValsPerSite = MaxVals;
55   hasNonDefaultValsPerSite = 1;
56 }
57 
58 /* This method is only used in value profiler mock testing.  */
59 COMPILER_RT_VISIBILITY void
__llvm_profile_set_num_value_sites(__llvm_profile_data * Data,uint32_t ValueKind,uint16_t NumValueSites)60 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
61                                    uint32_t ValueKind, uint16_t NumValueSites) {
62 #ifdef __GNUC__
63 #pragma GCC diagnostic push
64 #pragma GCC diagnostic ignored "-Wcast-qual"
65 #elif defined(__clang__)
66 #pragma clang diagnostic push
67 #pragma clang diagnostic ignored "-Wcast-qual"
68 #endif
69   *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
70 #ifdef __GNUC__
71 #pragma GCC diagnostic pop
72 #elif defined(__clang__)
73 #pragma clang diagnostic pop
74 #endif
75 }
76 
77 /* This method is only used in value profiler mock testing.  */
78 COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_iterate_data(const __llvm_profile_data * Data)79 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
80   return Data + 1;
81 }
82 
83 /* This method is only used in value profiler mock testing.  */
84 COMPILER_RT_VISIBILITY void *
__llvm_get_function_addr(const __llvm_profile_data * Data)85 __llvm_get_function_addr(const __llvm_profile_data *Data) {
86   return Data->FunctionPointer;
87 }
88 
89 /* Allocate an array that holds the pointers to the linked lists of
90  * value profile counter nodes. The number of element of the array
91  * is the total number of value profile sites instrumented. Returns
92  * 0 if allocation fails.
93  */
94 
allocateValueProfileCounters(__llvm_profile_data * Data)95 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
96   uint64_t NumVSites = 0;
97   uint32_t VKI;
98 
99   /* This function will never be called when value site array is allocated
100      statically at compile time.  */
101   hasStaticCounters = 0;
102   /* When dynamic allocation is enabled, allow tracking the max number of
103    * values allowd.  */
104   if (!hasNonDefaultValsPerSite)
105     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
106 
107   for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
108     NumVSites += Data->NumValueSites[VKI];
109 
110   // If NumVSites = 0, calloc is allowed to return a non-null pointer.
111   assert(NumVSites > 0 && "NumVSites can't be zero");
112   ValueProfNode **Mem =
113       (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
114   if (!Mem)
115     return 0;
116   if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
117     free(Mem);
118     return 0;
119   }
120   return 1;
121 }
122 
allocateOneNode(void)123 static ValueProfNode *allocateOneNode(void) {
124   ValueProfNode *Node;
125 
126   if (!hasStaticCounters)
127     return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
128 
129   /* Early check to avoid value wrapping around.  */
130   if (CurrentVNode + 1 > EndVNode) {
131     if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
132       PROF_WARN("Unable to track new values: %s. "
133                 " Consider using option -mllvm -vp-counters-per-site=<n> to "
134                 "allocate more"
135                 " value profile counters at compile time. \n",
136                 "Running out of static counters");
137     }
138     return 0;
139   }
140   Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
141   /* Due to section padding, EndVNode point to a byte which is one pass
142    * an incomplete VNode, so we need to skip the last incomplete node. */
143   if (Node + 1 > EndVNode)
144     return 0;
145 
146   return Node;
147 }
148 
149 static COMPILER_RT_ALWAYS_INLINE void
instrumentTargetValueImpl(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)150 instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
151                           uint32_t CounterIndex, uint64_t CountValue) {
152   __llvm_profile_data *PData = (__llvm_profile_data *)Data;
153   if (!PData)
154     return;
155   if (!CountValue)
156     return;
157   if (!PData->Values) {
158     if (!allocateValueProfileCounters(PData))
159       return;
160   }
161 
162   ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
163   ValueProfNode *PrevVNode = NULL;
164   ValueProfNode *MinCountVNode = NULL;
165   ValueProfNode *CurVNode = ValueCounters[CounterIndex];
166   uint64_t MinCount = UINT64_MAX;
167 
168   uint8_t VDataCount = 0;
169   while (CurVNode) {
170     if (TargetValue == CurVNode->Value) {
171       CurVNode->Count += CountValue;
172       return;
173     }
174     if (CurVNode->Count < MinCount) {
175       MinCount = CurVNode->Count;
176       MinCountVNode = CurVNode;
177     }
178     PrevVNode = CurVNode;
179     CurVNode = CurVNode->Next;
180     ++VDataCount;
181   }
182 
183   if (VDataCount >= VPMaxNumValsPerSite) {
184     /* Bump down the min count node's count. If it reaches 0,
185      * evict it. This eviction/replacement policy makes hot
186      * targets more sticky while cold targets less so. In other
187      * words, it makes it less likely for the hot targets to be
188      * prematurally evicted during warmup/establishment period,
189      * when their counts are still low. In a special case when
190      * the number of values tracked is reduced to only one, this
191      * policy will guarantee that the dominating target with >50%
192      * total count will survive in the end. Note that this scheme
193      * allows the runtime to track the min count node in an adaptive
194      * manner. It can correct previous mistakes and eventually
195      * lock on a cold target that is alread in stable state.
196      *
197      * In very rare cases,  this replacement scheme may still lead
198      * to target loss. For instance, out of \c N value slots, \c N-1
199      * slots are occupied by luke warm targets during the warmup
200      * period and the remaining one slot is competed by two or more
201      * very hot targets. If those hot targets occur in an interleaved
202      * way, none of them will survive (gain enough weight to throw out
203      * other established entries) due to the ping-pong effect.
204      * To handle this situation, user can choose to increase the max
205      * number of tracked values per value site. Alternatively, a more
206      * expensive eviction mechanism can be implemented. It requires
207      * the runtime to track the total number of evictions per-site.
208      * When the total number of evictions reaches certain threshold,
209      * the runtime can wipe out more than one lowest count entries
210      * to give space for hot targets.
211      */
212     if (MinCountVNode->Count <= CountValue) {
213       CurVNode = MinCountVNode;
214       CurVNode->Value = TargetValue;
215       CurVNode->Count = CountValue;
216     } else
217       MinCountVNode->Count -= CountValue;
218 
219     return;
220   }
221 
222   CurVNode = allocateOneNode();
223   if (!CurVNode)
224     return;
225   CurVNode->Value = TargetValue;
226   CurVNode->Count += CountValue;
227 
228   uint32_t Success = 0;
229   if (!ValueCounters[CounterIndex])
230     Success =
231         COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
232   else if (PrevVNode && !PrevVNode->Next)
233     Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
234 
235   if (!Success && !hasStaticCounters) {
236     free(CurVNode);
237     return;
238   }
239 }
240 
241 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue,void * Data,uint32_t CounterIndex)242 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
243                                  uint32_t CounterIndex) {
244   instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
245 }
246 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target_value(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)247 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
248                                        uint32_t CounterIndex,
249                                        uint64_t CountValue) {
250   instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
251 }
252 
253 /*
254  * The target values are partitioned into multiple ranges. The range spec is
255  * defined in InstrProfData.inc.
256  */
257 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_memop(uint64_t TargetValue,void * Data,uint32_t CounterIndex)258 __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
259                                 uint32_t CounterIndex) {
260   // Map the target value to the representative value of its range.
261   uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
262   __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
263 }
264 
265 /*
266  * A wrapper struct that represents value profile runtime data.
267  * Like InstrProfRecord class which is used by profiling host tools,
268  * ValueProfRuntimeRecord also implements the abstract interfaces defined in
269  * ValueProfRecordClosure so that the runtime data can be serialized using
270  * shared C implementation.
271  */
272 typedef struct ValueProfRuntimeRecord {
273   const __llvm_profile_data *Data;
274   ValueProfNode **NodesKind[IPVK_Last + 1];
275   uint8_t **SiteCountArray;
276 } ValueProfRuntimeRecord;
277 
278 /* ValueProfRecordClosure Interface implementation. */
279 
getNumValueSitesRT(const void * R,uint32_t VK)280 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
281   return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
282 }
283 
getNumValueDataRT(const void * R,uint32_t VK)284 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
285   uint32_t S = 0, I;
286   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
287   if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
288     return 0;
289   for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
290     S += Record->SiteCountArray[VK][I];
291   return S;
292 }
293 
getNumValueDataForSiteRT(const void * R,uint32_t VK,uint32_t S)294 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
295                                          uint32_t S) {
296   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
297   return Record->SiteCountArray[VK][S];
298 }
299 
300 static ValueProfRuntimeRecord RTRecord;
301 static ValueProfRecordClosure RTRecordClosure = {
302     &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */
303     getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT,
304     INSTR_PROF_NULLPTR, /* RemapValueData */
305     INSTR_PROF_NULLPTR, /* GetValueForSite, */
306     INSTR_PROF_NULLPTR  /* AllocValueProfData */
307 };
308 
309 static uint32_t
initializeValueProfRuntimeRecord(const __llvm_profile_data * Data,uint8_t * SiteCountArray[])310 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
311                                  uint8_t *SiteCountArray[]) {
312   unsigned I, J, S = 0, NumValueKinds = 0;
313   ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
314   RTRecord.Data = Data;
315   RTRecord.SiteCountArray = SiteCountArray;
316   for (I = 0; I <= IPVK_Last; I++) {
317     uint16_t N = Data->NumValueSites[I];
318     if (!N)
319       continue;
320 
321     NumValueKinds++;
322 
323     RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
324     for (J = 0; J < N; J++) {
325       /* Compute value count for each site. */
326       uint32_t C = 0;
327       ValueProfNode *Site =
328           Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
329       while (Site) {
330         C++;
331         Site = Site->Next;
332       }
333       if (C > UCHAR_MAX)
334         C = UCHAR_MAX;
335       RTRecord.SiteCountArray[I][J] = C;
336     }
337     S += N;
338   }
339   return NumValueKinds;
340 }
341 
getNextNValueData(uint32_t VK,uint32_t Site,InstrProfValueData * Dst,ValueProfNode * StartNode,uint32_t N)342 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
343                                         InstrProfValueData *Dst,
344                                         ValueProfNode *StartNode, uint32_t N) {
345   unsigned I;
346   ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
347   for (I = 0; I < N; I++) {
348     Dst[I].Value = VNode->Value;
349     Dst[I].Count = VNode->Count;
350     VNode = VNode->Next;
351   }
352   return VNode;
353 }
354 
getValueProfDataSizeWrapper(void)355 static uint32_t getValueProfDataSizeWrapper(void) {
356   return getValueProfDataSize(&RTRecordClosure);
357 }
358 
getNumValueDataForSiteWrapper(uint32_t VK,uint32_t S)359 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
360   return getNumValueDataForSiteRT(&RTRecord, VK, S);
361 }
362 
363 static VPDataReaderType TheVPDataReader = {
364     initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
365     getFirstValueProfRecord,          getNumValueDataForSiteWrapper,
366     getValueProfDataSizeWrapper,      getNextNValueData};
367 
lprofGetVPDataReader(void)368 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
369   return &TheVPDataReader;
370 }
371