1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2 |*
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 |*
7 \*===----------------------------------------------------------------------===*/
8 
9 #include <assert.h>
10 #include <limits.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "InstrProfiling.h"
16 #include "InstrProfilingInternal.h"
17 #include "InstrProfilingUtil.h"
18 
19 #define INSTR_PROF_VALUE_PROF_DATA
20 #define INSTR_PROF_COMMON_API_IMPL
21 #define INSTR_PROF_VALUE_PROF_MEMOP_API
22 #include "profile/InstrProfData.inc"
23 
24 static int hasStaticCounters = 1;
25 static int OutOfNodesWarnings = 0;
26 static int hasNonDefaultValsPerSite = 0;
27 #define INSTR_PROF_MAX_VP_WARNS 10
28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29 #define INSTR_PROF_VNODE_POOL_SIZE 1024
30 
31 #ifndef _MSC_VER
32 /* A shared static pool in addition to the vnodes statically
33  * allocated by the compiler.  */
34 COMPILER_RT_VISIBILITY ValueProfNode
35     lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
36        COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37 #endif
38 
39 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
40     INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
41 
lprofSetupValueProfiler(void)42 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
43   const char *Str = 0;
44   Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
45   if (Str && Str[0]) {
46     VPMaxNumValsPerSite = atoi(Str);
47     hasNonDefaultValsPerSite = 1;
48   }
49   if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
50     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51 }
52 
lprofSetMaxValsPerSite(uint32_t MaxVals)53 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
54   VPMaxNumValsPerSite = MaxVals;
55   hasNonDefaultValsPerSite = 1;
56 }
57 
58 /* This method is only used in value profiler mock testing.  */
59 COMPILER_RT_VISIBILITY void
__llvm_profile_set_num_value_sites(__llvm_profile_data * Data,uint32_t ValueKind,uint16_t NumValueSites)60 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
61                                    uint32_t ValueKind, uint16_t NumValueSites) {
62   *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
63 }
64 
65 /* This method is only used in value profiler mock testing.  */
66 COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_iterate_data(const __llvm_profile_data * Data)67 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
68   return Data + 1;
69 }
70 
71 /* This method is only used in value profiler mock testing.  */
72 COMPILER_RT_VISIBILITY void *
__llvm_get_function_addr(const __llvm_profile_data * Data)73 __llvm_get_function_addr(const __llvm_profile_data *Data) {
74   return Data->FunctionPointer;
75 }
76 
77 /* Allocate an array that holds the pointers to the linked lists of
78  * value profile counter nodes. The number of element of the array
79  * is the total number of value profile sites instrumented. Returns
80  * 0 if allocation fails.
81  */
82 
allocateValueProfileCounters(__llvm_profile_data * Data)83 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
84   uint64_t NumVSites = 0;
85   uint32_t VKI;
86 
87   /* This function will never be called when value site array is allocated
88      statically at compile time.  */
89   hasStaticCounters = 0;
90   /* When dynamic allocation is enabled, allow tracking the max number of
91    * values allowd.  */
92   if (!hasNonDefaultValsPerSite)
93     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
94 
95   for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
96     NumVSites += Data->NumValueSites[VKI];
97 
98   // If NumVSites = 0, calloc is allowed to return a non-null pointer.
99   assert(NumVSites > 0 && "NumVSites can't be zero");
100   ValueProfNode **Mem =
101       (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
102   if (!Mem)
103     return 0;
104   if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
105     free(Mem);
106     return 0;
107   }
108   return 1;
109 }
110 
allocateOneNode(void)111 static ValueProfNode *allocateOneNode(void) {
112   ValueProfNode *Node;
113 
114   if (!hasStaticCounters)
115     return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
116 
117   /* Early check to avoid value wrapping around.  */
118   if (CurrentVNode + 1 > EndVNode) {
119     if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
120       PROF_WARN("Unable to track new values: %s. "
121                 " Consider using option -mllvm -vp-counters-per-site=<n> to "
122                 "allocate more"
123                 " value profile counters at compile time. \n",
124                 "Running out of static counters");
125     }
126     return 0;
127   }
128   Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
129   /* Due to section padding, EndVNode point to a byte which is one pass
130    * an incomplete VNode, so we need to skip the last incomplete node. */
131   if (Node + 1 > EndVNode)
132     return 0;
133 
134   return Node;
135 }
136 
137 static COMPILER_RT_ALWAYS_INLINE void
instrumentTargetValueImpl(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)138 instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
139                           uint32_t CounterIndex, uint64_t CountValue) {
140   __llvm_profile_data *PData = (__llvm_profile_data *)Data;
141   if (!PData)
142     return;
143   if (!CountValue)
144     return;
145   if (!PData->Values) {
146     if (!allocateValueProfileCounters(PData))
147       return;
148   }
149 
150   ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
151   ValueProfNode *PrevVNode = NULL;
152   ValueProfNode *MinCountVNode = NULL;
153   ValueProfNode *CurVNode = ValueCounters[CounterIndex];
154   uint64_t MinCount = UINT64_MAX;
155 
156   uint8_t VDataCount = 0;
157   while (CurVNode) {
158     if (TargetValue == CurVNode->Value) {
159       CurVNode->Count += CountValue;
160       return;
161     }
162     if (CurVNode->Count < MinCount) {
163       MinCount = CurVNode->Count;
164       MinCountVNode = CurVNode;
165     }
166     PrevVNode = CurVNode;
167     CurVNode = CurVNode->Next;
168     ++VDataCount;
169   }
170 
171   if (VDataCount >= VPMaxNumValsPerSite) {
172     /* Bump down the min count node's count. If it reaches 0,
173      * evict it. This eviction/replacement policy makes hot
174      * targets more sticky while cold targets less so. In other
175      * words, it makes it less likely for the hot targets to be
176      * prematurally evicted during warmup/establishment period,
177      * when their counts are still low. In a special case when
178      * the number of values tracked is reduced to only one, this
179      * policy will guarantee that the dominating target with >50%
180      * total count will survive in the end. Note that this scheme
181      * allows the runtime to track the min count node in an adaptive
182      * manner. It can correct previous mistakes and eventually
183      * lock on a cold target that is alread in stable state.
184      *
185      * In very rare cases,  this replacement scheme may still lead
186      * to target loss. For instance, out of \c N value slots, \c N-1
187      * slots are occupied by luke warm targets during the warmup
188      * period and the remaining one slot is competed by two or more
189      * very hot targets. If those hot targets occur in an interleaved
190      * way, none of them will survive (gain enough weight to throw out
191      * other established entries) due to the ping-pong effect.
192      * To handle this situation, user can choose to increase the max
193      * number of tracked values per value site. Alternatively, a more
194      * expensive eviction mechanism can be implemented. It requires
195      * the runtime to track the total number of evictions per-site.
196      * When the total number of evictions reaches certain threshold,
197      * the runtime can wipe out more than one lowest count entries
198      * to give space for hot targets.
199      */
200     if (MinCountVNode->Count <= CountValue) {
201       CurVNode = MinCountVNode;
202       CurVNode->Value = TargetValue;
203       CurVNode->Count = CountValue;
204     } else
205       MinCountVNode->Count -= CountValue;
206 
207     return;
208   }
209 
210   CurVNode = allocateOneNode();
211   if (!CurVNode)
212     return;
213   CurVNode->Value = TargetValue;
214   CurVNode->Count += CountValue;
215 
216   uint32_t Success = 0;
217   if (!ValueCounters[CounterIndex])
218     Success =
219         COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
220   else if (PrevVNode && !PrevVNode->Next)
221     Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
222 
223   if (!Success && !hasStaticCounters) {
224     free(CurVNode);
225     return;
226   }
227 }
228 
229 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue,void * Data,uint32_t CounterIndex)230 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
231                                  uint32_t CounterIndex) {
232   instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
233 }
234 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target_value(uint64_t TargetValue,void * Data,uint32_t CounterIndex,uint64_t CountValue)235 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
236                                        uint32_t CounterIndex,
237                                        uint64_t CountValue) {
238   instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
239 }
240 
241 /*
242  * The target values are partitioned into multiple ranges. The range spec is
243  * defined in InstrProfData.inc.
244  */
245 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_memop(uint64_t TargetValue,void * Data,uint32_t CounterIndex)246 __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
247                                 uint32_t CounterIndex) {
248   // Map the target value to the representative value of its range.
249   uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
250   __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
251 }
252 
253 /*
254  * A wrapper struct that represents value profile runtime data.
255  * Like InstrProfRecord class which is used by profiling host tools,
256  * ValueProfRuntimeRecord also implements the abstract interfaces defined in
257  * ValueProfRecordClosure so that the runtime data can be serialized using
258  * shared C implementation.
259  */
260 typedef struct ValueProfRuntimeRecord {
261   const __llvm_profile_data *Data;
262   ValueProfNode **NodesKind[IPVK_Last + 1];
263   uint8_t **SiteCountArray;
264 } ValueProfRuntimeRecord;
265 
266 /* ValueProfRecordClosure Interface implementation. */
267 
getNumValueSitesRT(const void * R,uint32_t VK)268 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
269   return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
270 }
271 
getNumValueDataRT(const void * R,uint32_t VK)272 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
273   uint32_t S = 0, I;
274   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
275   if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
276     return 0;
277   for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
278     S += Record->SiteCountArray[VK][I];
279   return S;
280 }
281 
getNumValueDataForSiteRT(const void * R,uint32_t VK,uint32_t S)282 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
283                                          uint32_t S) {
284   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
285   return Record->SiteCountArray[VK][S];
286 }
287 
288 static ValueProfRuntimeRecord RTRecord;
289 static ValueProfRecordClosure RTRecordClosure = {
290     &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */
291     getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT,
292     INSTR_PROF_NULLPTR, /* RemapValueData */
293     INSTR_PROF_NULLPTR, /* GetValueForSite, */
294     INSTR_PROF_NULLPTR  /* AllocValueProfData */
295 };
296 
297 static uint32_t
initializeValueProfRuntimeRecord(const __llvm_profile_data * Data,uint8_t * SiteCountArray[])298 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
299                                  uint8_t *SiteCountArray[]) {
300   unsigned I, J, S = 0, NumValueKinds = 0;
301   ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
302   RTRecord.Data = Data;
303   RTRecord.SiteCountArray = SiteCountArray;
304   for (I = 0; I <= IPVK_Last; I++) {
305     uint16_t N = Data->NumValueSites[I];
306     if (!N)
307       continue;
308 
309     NumValueKinds++;
310 
311     RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
312     for (J = 0; J < N; J++) {
313       /* Compute value count for each site. */
314       uint32_t C = 0;
315       ValueProfNode *Site =
316           Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
317       while (Site) {
318         C++;
319         Site = Site->Next;
320       }
321       if (C > UCHAR_MAX)
322         C = UCHAR_MAX;
323       RTRecord.SiteCountArray[I][J] = C;
324     }
325     S += N;
326   }
327   return NumValueKinds;
328 }
329 
getNextNValueData(uint32_t VK,uint32_t Site,InstrProfValueData * Dst,ValueProfNode * StartNode,uint32_t N)330 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
331                                         InstrProfValueData *Dst,
332                                         ValueProfNode *StartNode, uint32_t N) {
333   unsigned I;
334   ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
335   for (I = 0; I < N; I++) {
336     Dst[I].Value = VNode->Value;
337     Dst[I].Count = VNode->Count;
338     VNode = VNode->Next;
339   }
340   return VNode;
341 }
342 
getValueProfDataSizeWrapper(void)343 static uint32_t getValueProfDataSizeWrapper(void) {
344   return getValueProfDataSize(&RTRecordClosure);
345 }
346 
getNumValueDataForSiteWrapper(uint32_t VK,uint32_t S)347 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
348   return getNumValueDataForSiteRT(&RTRecord, VK, S);
349 }
350 
351 static VPDataReaderType TheVPDataReader = {
352     initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
353     getFirstValueProfRecord,          getNumValueDataForSiteWrapper,
354     getValueProfDataSizeWrapper,      getNextNValueData};
355 
lprofGetVPDataReader(void)356 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
357   return &TheVPDataReader;
358 }
359